10 #ifndef TPETRA_CRSMATRIX_DEF_HPP
11 #define TPETRA_CRSMATRIX_DEF_HPP
23 #include "Tpetra_RowMatrix.hpp"
24 #include "Tpetra_LocalCrsMatrixOperator.hpp"
32 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
40 #include "Tpetra_Details_packCrsMatrix.hpp"
41 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
43 #include "Teuchos_FancyOStream.hpp"
44 #include "Teuchos_RCP.hpp"
45 #include "Teuchos_DataAccess.hpp"
46 #include "Teuchos_SerialDenseMatrix.hpp"
47 #include "KokkosBlas1_scal.hpp"
48 #include "KokkosSparse_getDiagCopy.hpp"
49 #include "KokkosSparse_spmv.hpp"
61 template<
class T,
class BinaryFunction>
62 T atomic_binary_function_update (
volatile T*
const dest,
76 T newVal = f (assume, inputVal);
77 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
78 }
while (assume != oldVal);
98 template<
class Scalar>
102 typedef Teuchos::ScalarTraits<Scalar> STS;
103 return std::max (STS::magnitude (x), STS::magnitude (y));
112 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
113 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
114 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
115 size_t maxNumEntriesPerRow,
116 const Teuchos::RCP<Teuchos::ParameterList>& params) :
119 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
120 "[, RCP<ParameterList>]): ";
121 Teuchos::RCP<crs_graph_type> graph;
123 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
126 catch (std::exception& e) {
127 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
128 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
129 "size_t [, RCP<ParameterList>]) threw an exception: "
136 staticGraph_ = myGraph_;
141 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
144 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
145 const Teuchos::RCP<Teuchos::ParameterList>& params) :
148 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
149 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
150 Teuchos::RCP<crs_graph_type> graph;
156 catch (std::exception& e) {
157 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
158 (
true, std::runtime_error,
"CrsGraph constructor "
159 "(RCP<const Map>, ArrayView<const size_t>"
160 "[, RCP<ParameterList>]) threw an exception: "
167 staticGraph_ = graph;
172 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
175 const Teuchos::RCP<const map_type>& colMap,
176 const size_t maxNumEntPerRow,
177 const Teuchos::RCP<Teuchos::ParameterList>& params) :
180 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
181 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
182 const char suffix[] =
183 " Please report this bug to the Tpetra developers.";
186 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
187 (! staticGraph_.is_null (), std::logic_error,
188 "staticGraph_ is not null at the beginning of the constructor."
190 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
191 (! myGraph_.is_null (), std::logic_error,
192 "myGraph_ is not null at the beginning of the constructor."
194 Teuchos::RCP<crs_graph_type> graph;
200 catch (std::exception& e) {
201 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
202 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
203 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
204 "exception: " << e.what ());
210 staticGraph_ = myGraph_;
215 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
218 const Teuchos::RCP<const map_type>& colMap,
219 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
220 const Teuchos::RCP<Teuchos::ParameterList>& params) :
223 const char tfecfFuncName[] =
224 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
225 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
226 Teuchos::RCP<crs_graph_type> graph;
232 catch (std::exception& e) {
233 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
234 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
235 "RCP<const Map>, ArrayView<const size_t>[, "
236 "RCP<ParameterList>]) threw an exception: " << e.what ());
242 staticGraph_ = graph;
248 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
250 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
251 const Teuchos::RCP<Teuchos::ParameterList>& ) :
253 staticGraph_ (graph),
254 storageStatus_ (Details::STORAGE_1D_PACKED)
257 typedef typename local_matrix_device_type::values_type values_type;
258 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
259 "RCP<ParameterList>]): ";
262 std::unique_ptr<std::string> prefix;
264 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
265 std::ostringstream os;
266 os << *prefix <<
"Start" << endl;
267 std::cerr << os.str ();
270 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
271 (graph.is_null (), std::runtime_error,
"Input graph is null.");
272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
273 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
274 "is not fill complete. You must call fillComplete on the "
275 "graph before using it to construct a CrsMatrix. Note that "
276 "calling resumeFill on the graph makes it not fill complete, "
277 "even if you had previously called fillComplete. In that "
278 "case, you must call fillComplete on the graph again.");
286 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
288 std::ostringstream os;
289 os << *prefix <<
"Allocate values: " << numEnt << endl;
290 std::cerr << os.str ();
293 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
295 valuesUnpacked_wdv = valuesPacked_wdv;
300 std::ostringstream os;
301 os << *prefix <<
"Done" << endl;
302 std::cerr << os.str ();
306 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
309 const Teuchos::RCP<const crs_graph_type>& graph,
310 const Teuchos::RCP<Teuchos::ParameterList>& params) :
312 staticGraph_ (graph),
313 storageStatus_ (matrix.storageStatus_)
315 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
316 "local_matrix_device_type::values_type, "
317 "[,RCP<ParameterList>]): ";
318 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
319 (graph.is_null (), std::runtime_error,
"Input graph is null.");
320 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
321 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
322 "is not fill complete. You must call fillComplete on the "
323 "graph before using it to construct a CrsMatrix. Note that "
324 "calling resumeFill on the graph makes it not fill complete, "
325 "even if you had previously called fillComplete. In that "
326 "case, you must call fillComplete on the graph again.");
328 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
329 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
331 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
332 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
338 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
340 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
341 const typename local_matrix_device_type::values_type& values,
342 const Teuchos::RCP<Teuchos::ParameterList>& ) :
344 staticGraph_ (graph),
345 storageStatus_ (Details::STORAGE_1D_PACKED)
347 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
348 "local_matrix_device_type::values_type, "
349 "[,RCP<ParameterList>]): ";
350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
351 (graph.is_null (), std::runtime_error,
"Input graph is null.");
352 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
353 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
354 "is not fill complete. You must call fillComplete on the "
355 "graph before using it to construct a CrsMatrix. Note that "
356 "calling resumeFill on the graph makes it not fill complete, "
357 "even if you had previously called fillComplete. In that "
358 "case, you must call fillComplete on the graph again.");
367 valuesUnpacked_wdv = valuesPacked_wdv;
378 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
381 const Teuchos::RCP<const map_type>& colMap,
382 const typename local_graph_device_type::row_map_type& rowPointers,
383 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
384 const typename local_matrix_device_type::values_type& values,
385 const Teuchos::RCP<Teuchos::ParameterList>& params) :
387 storageStatus_ (Details::STORAGE_1D_PACKED)
389 using Details::getEntryOnHost;
392 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
393 "RCP<const Map>, ptr, ind, val[, params]): ";
394 const char suffix[] =
395 ". Please report this bug to the Tpetra developers.";
399 std::unique_ptr<std::string> prefix;
401 prefix = this->createPrefix(
402 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
403 std::ostringstream os;
404 os << *prefix <<
"Start" << endl;
405 std::cerr << os.str ();
412 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
413 (values.extent(0) != columnIndices.extent(0),
414 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
415 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
417 if (debug && rowPointers.extent(0) != 0) {
418 const size_t numEnt =
419 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
420 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
421 (numEnt !=
size_t(columnIndices.extent(0)) ||
422 numEnt !=
size_t(values.extent(0)),
423 std::invalid_argument,
"Last entry of rowPointers says that "
424 "the matrix has " << numEnt <<
" entr"
425 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
426 "columnIndices and values don't match this. "
427 "columnIndices.extent(0)=" << columnIndices.extent (0)
428 <<
" and values.extent(0)=" << values.extent (0) <<
".");
431 RCP<crs_graph_type> graph;
433 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
434 columnIndices, params));
436 catch (std::exception& e) {
437 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
438 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
439 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
447 auto lclGraph = graph->getLocalGraphDevice ();
448 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
449 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
450 lclGraph.entries.extent (0) != columnIndices.extent (0),
451 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
452 "ind[, params]) did not set the local graph correctly." << suffix);
453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
454 (lclGraph.entries.extent (0) != values.extent (0),
455 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
456 "params]) did not set the local graph correctly. "
457 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
458 <<
" != values.extent(0) = " << values.extent (0) << suffix);
464 staticGraph_ = graph;
474 valuesUnpacked_wdv = valuesPacked_wdv;
483 std::ostringstream os;
484 os << *prefix <<
"Done" << endl;
485 std::cerr << os.str();
489 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
492 const Teuchos::RCP<const map_type>& colMap,
493 const Teuchos::ArrayRCP<size_t>& ptr,
494 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
495 const Teuchos::ArrayRCP<Scalar>& val,
496 const Teuchos::RCP<Teuchos::ParameterList>& params) :
498 storageStatus_ (Details::STORAGE_1D_PACKED)
500 using Kokkos::Compat::getKokkosViewDeepCopy;
501 using Teuchos::av_reinterpret_cast;
503 using values_type =
typename local_matrix_device_type::values_type;
505 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
506 "RCP<const Map>, ptr, ind, val[, params]): ";
508 RCP<crs_graph_type> graph;
513 catch (std::exception& e) {
514 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
515 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
516 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
517 "RCP<ParameterList>]) threw an exception: " << e.what ());
523 staticGraph_ = graph;
536 auto lclGraph = staticGraph_->getLocalGraphDevice ();
537 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
538 (
size_t (lclGraph.row_map.extent (0)) !=
size_t (ptr.size ()) ||
539 size_t (lclGraph.entries.extent (0)) !=
size_t (ind.size ()),
540 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
541 "ptr, ind[, params]) did not set the local graph correctly. "
542 "Please report this bug to the Tpetra developers.");
545 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
547 valuesUnpacked_wdv = valuesPacked_wdv;
557 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
560 const Teuchos::RCP<const map_type>& colMap,
562 const Teuchos::RCP<Teuchos::ParameterList>& params) :
564 storageStatus_ (Details::STORAGE_1D_PACKED),
567 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
568 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
569 const char suffix[] =
570 " Please report this bug to the Tpetra developers.";
572 Teuchos::RCP<crs_graph_type> graph;
575 lclMatrix.graph, params));
577 catch (std::exception& e) {
578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
579 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
580 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
581 "exception: " << e.what ());
583 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
584 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
585 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
586 "did not produce a fill-complete graph. Please report this bug to the "
587 "Tpetra developers.");
592 staticGraph_ = graph;
595 valuesUnpacked_wdv = valuesPacked_wdv;
597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
599 "At the end of a CrsMatrix constructor that should produce "
600 "a fillComplete matrix, isFillActive() is true." << suffix);
601 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
603 "CrsMatrix constructor that should produce a fillComplete "
604 "matrix, isFillComplete() is false." << suffix);
608 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
611 const Teuchos::RCP<const map_type>& rowMap,
612 const Teuchos::RCP<const map_type>& colMap,
613 const Teuchos::RCP<const map_type>& domainMap,
614 const Teuchos::RCP<const map_type>& rangeMap,
615 const Teuchos::RCP<Teuchos::ParameterList>& params) :
617 storageStatus_ (Details::STORAGE_1D_PACKED),
620 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
621 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
622 "local_matrix_device_type[, RCP<ParameterList>]): ";
623 const char suffix[] =
624 " Please report this bug to the Tpetra developers.";
626 Teuchos::RCP<crs_graph_type> graph;
628 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
629 domainMap, rangeMap, params));
631 catch (std::exception& e) {
632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
633 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
634 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
635 "RCP<ParameterList>]) threw an exception: " << e.what ());
637 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
638 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
639 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
640 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
641 "not produce a fillComplete graph." << suffix);
646 staticGraph_ = graph;
649 valuesUnpacked_wdv = valuesPacked_wdv;
651 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
653 "At the end of a CrsMatrix constructor that should produce "
654 "a fillComplete matrix, isFillActive() is true." << suffix);
655 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
657 "CrsMatrix constructor that should produce a fillComplete "
658 "matrix, isFillComplete() is false." << suffix);
662 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
665 const Teuchos::RCP<const map_type>& rowMap,
666 const Teuchos::RCP<const map_type>& colMap,
667 const Teuchos::RCP<const map_type>& domainMap,
668 const Teuchos::RCP<const map_type>& rangeMap,
669 const Teuchos::RCP<const import_type>& importer,
670 const Teuchos::RCP<const export_type>& exporter,
671 const Teuchos::RCP<Teuchos::ParameterList>& params) :
673 storageStatus_ (Details::STORAGE_1D_PACKED),
677 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
678 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
679 const char suffix[] =
680 " Please report this bug to the Tpetra developers.";
682 Teuchos::RCP<crs_graph_type> graph;
685 domainMap, rangeMap, importer,
688 catch (std::exception& e) {
689 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
690 (
true, std::runtime_error,
"CrsGraph constructor "
691 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
692 "params) threw: " << e.what ());
694 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
695 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
696 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
697 "Export, params) did not produce a fill-complete graph. "
698 "Please report this bug to the Tpetra developers.");
703 staticGraph_ = graph;
706 valuesUnpacked_wdv = valuesPacked_wdv;
708 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
710 "At the end of a CrsMatrix constructor that should produce "
711 "a fillComplete matrix, isFillActive() is true." << suffix);
712 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
714 "CrsMatrix constructor that should produce a fillComplete "
715 "matrix, isFillComplete() is false." << suffix);
719 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
722 const Teuchos::DataAccess copyOrView):
724 staticGraph_ (source.getCrsGraph()),
725 storageStatus_ (source.storageStatus_)
727 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
728 "const CrsMatrix&, const Teuchos::DataAccess): ";
729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
731 "Source graph must be fillComplete().");
733 if (copyOrView == Teuchos::Copy) {
734 using values_type =
typename local_matrix_device_type::values_type;
736 using Kokkos::view_alloc;
737 using Kokkos::WithoutInitializing;
738 values_type newvals (view_alloc (
"val", WithoutInitializing),
743 valuesUnpacked_wdv = valuesPacked_wdv;
746 else if (copyOrView == Teuchos::View) {
752 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
753 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
754 "has an invalid value " << copyOrView <<
". Valid values "
755 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
756 "Teuchos::View = " << Teuchos::View <<
".");
761 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
766 std::swap(crs_matrix.
importMV_, this->importMV_);
767 std::swap(crs_matrix.
exportMV_, this->exportMV_);
768 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
769 std::swap(crs_matrix.myGraph_, this->myGraph_);
770 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
771 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
774 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
777 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
778 Teuchos::RCP<const Teuchos::Comm<int> >
781 return getCrsGraphRef ().getComm ();
784 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
788 return fillComplete_;
791 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
795 return ! fillComplete_;
798 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
802 return this->getCrsGraphRef ().isStorageOptimized ();
805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
809 return getCrsGraphRef ().isLocallyIndexed ();
812 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
816 return getCrsGraphRef ().isGloballyIndexed ();
819 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
823 return getCrsGraphRef ().hasColMap ();
826 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
830 return getCrsGraphRef ().getGlobalNumEntries ();
833 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
837 return getCrsGraphRef ().getLocalNumEntries ();
840 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
844 return getCrsGraphRef ().getGlobalNumRows ();
847 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
851 return getCrsGraphRef ().getGlobalNumCols ();
854 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
858 return getCrsGraphRef ().getLocalNumRows ();
862 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
866 return getCrsGraphRef ().getLocalNumCols ();
870 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
874 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
881 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
884 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
891 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
895 return getCrsGraphRef ().getLocalMaxNumRowEntries ();
898 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
902 return getRowMap ()->getIndexBase ();
905 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
906 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
909 return getCrsGraphRef ().getRowMap ();
912 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
913 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
916 return getCrsGraphRef ().getColMap ();
919 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
920 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
923 return getCrsGraphRef ().getDomainMap ();
926 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
927 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
930 return getCrsGraphRef ().getRangeMap ();
933 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
934 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
937 if (staticGraph_ != Teuchos::null) {
943 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
944 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
947 if (staticGraph_ != Teuchos::null) {
953 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
958 #ifdef HAVE_TPETRA_DEBUG
959 constexpr
bool debug =
true;
961 constexpr
bool debug =
false;
962 #endif // HAVE_TPETRA_DEBUG
964 if (! this->staticGraph_.is_null ()) {
965 return * (this->staticGraph_);
969 const char tfecfFuncName[] =
"getCrsGraphRef: ";
970 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
971 (this->myGraph_.is_null (), std::logic_error,
972 "Both staticGraph_ and myGraph_ are null. "
973 "Please report this bug to the Tpetra developers.");
975 return * (this->myGraph_);
979 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
980 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
984 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
987 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
988 staticGraph_->getLocalGraphDevice());
991 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
992 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
996 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
997 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
998 valuesPacked_wdv.getHostView(Access::ReadWrite),
999 staticGraph_->getLocalGraphHost());
1002 #if KOKKOSKERNELS_VERSION < 40299
1004 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1005 std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1009 auto localMatrix = getLocalMatrixDevice();
1010 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
1011 if(this->getLocalNumEntries() <=
size_t(Teuchos::OrdinalTraits<LocalOrdinal>::max()))
1013 if(this->ordinalRowptrs.data() ==
nullptr)
1015 auto originalRowptrs = localMatrix.graph.row_map;
1018 this->ordinalRowptrs = ordinal_rowptrs_type(
1019 Kokkos::ViewAllocateWithoutInitializing(
"CrsMatrix::ordinalRowptrs"), originalRowptrs.extent(0));
1020 auto ordinalRowptrs_ = this->ordinalRowptrs;
1021 Kokkos::parallel_for(
"CrsMatrix::getLocalMultiplyOperator::convertRowptrs",
1022 Kokkos::RangePolicy<execution_space>(0, originalRowptrs.extent(0)),
1023 KOKKOS_LAMBDA(LocalOrdinal i)
1025 ordinalRowptrs_(i) = originalRowptrs(i);
1029 return std::make_shared<local_multiply_op_type>(
1030 std::make_shared<local_matrix_device_type>(localMatrix), this->ordinalRowptrs);
1034 return std::make_shared<local_multiply_op_type>(
1035 std::make_shared<local_matrix_device_type>(localMatrix));
1039 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1043 return myGraph_.is_null ();
1046 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1053 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1060 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1069 const char tfecfFuncName[] =
"allocateValues: ";
1070 const char suffix[] =
1071 " Please report this bug to the Tpetra developers.";
1072 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1074 std::unique_ptr<std::string> prefix;
1076 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1077 std::ostringstream os;
1078 os << *prefix <<
"lg: "
1079 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1081 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1082 <<
"Allocated" << endl;
1083 std::cerr << os.str();
1086 const bool debug = Behavior::debug(
"CrsMatrix");
1088 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1089 (this->staticGraph_.is_null (), std::logic_error,
1090 "staticGraph_ is null." << suffix);
1095 if ((gas == GraphAlreadyAllocated) !=
1096 staticGraph_->indicesAreAllocated ()) {
1097 const char err1[] =
"The caller has asserted that the graph "
1099 const char err2[] =
"already allocated, but the static graph "
1100 "says that its indices are ";
1101 const char err3[] =
"already allocated. ";
1102 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1103 (gas == GraphAlreadyAllocated &&
1104 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1105 err1 << err2 <<
"not " << err3 << suffix);
1106 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1107 (gas != GraphAlreadyAllocated &&
1108 staticGraph_->indicesAreAllocated (), std::logic_error,
1109 err1 <<
"not " << err2 << err3 << suffix);
1117 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1118 (! this->staticGraph_->indicesAreAllocated () &&
1119 this->myGraph_.is_null (), std::logic_error,
1120 "The static graph says that its indices are not allocated, "
1121 "but the graph is not owned by the matrix." << suffix);
1124 if (gas == GraphNotYetAllocated) {
1126 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1127 (this->myGraph_.is_null (), std::logic_error,
1128 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1131 this->myGraph_->allocateIndices (lg, verbose);
1133 catch (std::exception& e) {
1134 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1135 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1136 "threw an exception: " << e.what ());
1139 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1140 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1141 "threw an exception not a subclass of std::exception.");
1146 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1148 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1149 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1150 (this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1151 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1155 using values_type =
typename local_matrix_device_type::values_type;
1157 std::ostringstream os;
1158 os << *prefix <<
"Allocate values_wdv: Pre "
1159 << valuesUnpacked_wdv.extent(0) <<
", post "
1160 << lclTotalNumEntries << endl;
1161 std::cerr << os.str();
1165 values_type(
"Tpetra::CrsMatrix::values",
1166 lclTotalNumEntries));
1170 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1176 using ::Tpetra::Details::getEntryOnHost;
1177 using Teuchos::arcp_const_cast;
1178 using Teuchos::Array;
1179 using Teuchos::ArrayRCP;
1180 using Teuchos::null;
1184 using row_map_type =
typename local_graph_device_type::row_map_type;
1185 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1186 using values_type =
typename local_matrix_device_type::values_type;
1188 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1190 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1191 "fillComplete or expertStaticFillComplete): ";
1192 const char suffix[] =
1193 " Please report this bug to the Tpetra developers.";
1197 std::unique_ptr<std::string> prefix;
1199 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1200 std::ostringstream os;
1201 os << *prefix << endl;
1202 std::cerr << os.str ();
1208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1209 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1210 "(myGraph_) is null. This means that the matrix has a "
1211 "const (a.k.a. \"static\") graph. fillComplete or "
1212 "expertStaticFillComplete should never call "
1213 "fillLocalGraphAndMatrix in that case." << suffix);
1216 const size_t lclNumRows = this->getLocalNumRows ();
1231 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1232 myGraph_->rowPtrsUnpacked_dev_;
1235 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1236 (curRowOffsets.extent (0) == 0, std::logic_error,
1237 "curRowOffsets.extent(0) == 0.");
1238 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1239 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1240 "curRowOffsets.extent(0) = "
1241 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1242 << (lclNumRows + 1) <<
".");
1243 const size_t numOffsets = curRowOffsets.extent (0);
1244 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1245 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1247 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1248 std::logic_error,
"numOffsets = " <<
1249 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1250 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1251 << numOffsets <<
") = " << valToCheck <<
".");
1254 if (myGraph_->getLocalNumEntries() !=
1255 myGraph_->getLocalAllocationSize()) {
1259 typename row_map_type::non_const_type k_ptrs;
1260 row_map_type k_ptrs_const;
1261 lclinds_1d_type k_inds;
1265 std::ostringstream os;
1266 const auto numEnt = myGraph_->getLocalNumEntries();
1267 const auto allocSize = myGraph_->getLocalAllocationSize();
1268 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1269 <<
", allocSize=" << allocSize << endl;
1270 std::cerr << os.str ();
1278 if (debug && curRowOffsets.extent (0) != 0) {
1279 const size_t numOffsets =
1280 static_cast<size_t> (curRowOffsets.extent (0));
1281 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1282 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1283 (static_cast<size_t> (valToCheck) !=
1284 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1285 std::logic_error,
"(unpacked branch) Before "
1286 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1287 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1288 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1289 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1290 (static_cast<size_t> (valToCheck) !=
1291 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1292 std::logic_error,
"(unpacked branch) Before "
1293 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1294 <<
") = " << valToCheck
1295 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1296 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1304 size_t lclTotalNumEntries = 0;
1310 std::ostringstream os;
1311 os << *prefix <<
"Allocate packed row offsets: "
1312 << (lclNumRows+1) << endl;
1313 std::cerr << os.str ();
1315 typename row_map_type::non_const_type
1316 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1317 typename row_entries_type::const_type numRowEnt_h =
1318 myGraph_->k_numRowEntries_;
1321 lclTotalNumEntries =
1325 k_ptrs = packedRowOffsets;
1326 k_ptrs_const = k_ptrs;
1330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1331 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1333 "(unpacked branch) After packing k_ptrs, "
1334 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1335 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1336 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1337 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1338 (valToCheck != lclTotalNumEntries, std::logic_error,
1339 "(unpacked branch) After filling k_ptrs, "
1340 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1341 <<
" != total number of entries on the calling process = "
1342 << lclTotalNumEntries <<
".");
1347 std::ostringstream os;
1348 os << *prefix <<
"Allocate packed local column indices: "
1349 << lclTotalNumEntries << endl;
1350 std::cerr << os.str ();
1352 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1354 std::ostringstream os;
1355 os << *prefix <<
"Allocate packed values: "
1356 << lclTotalNumEntries << endl;
1357 std::cerr << os.str ();
1359 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1371 using inds_packer_type = pack_functor<
1372 typename Graph::local_graph_device_type::entries_type::non_const_type,
1373 typename Graph::local_inds_dualv_type::t_dev::const_type,
1374 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1375 typename Graph::local_graph_device_type::row_map_type>;
1376 inds_packer_type indsPacker (
1378 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1379 k_ptrs, curRowOffsets);
1381 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1382 Kokkos::parallel_for
1383 (
"Tpetra::CrsMatrix pack column indices",
1384 range_type (0, lclNumRows), indsPacker);
1388 using vals_packer_type = pack_functor<
1389 typename values_type::non_const_type,
1390 typename values_type::const_type,
1391 typename row_map_type::non_const_type,
1392 typename row_map_type::const_type>;
1393 vals_packer_type valsPacker (
1395 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1396 k_ptrs, curRowOffsets);
1397 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1398 range_type (0, lclNumRows), valsPacker);
1401 const char myPrefix[] =
"(\"Optimize Storage\""
1402 "=true branch) After packing, ";
1403 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1404 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1405 <<
"k_ptrs.extent(0) = 0. This probably means that "
1406 "rowPtrsUnpacked_ was never allocated.");
1407 if (k_ptrs.extent (0) != 0) {
1408 const size_t numOffsets (k_ptrs.extent (0));
1409 const auto valToCheck =
1410 getEntryOnHost (k_ptrs, numOffsets - 1);
1411 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1412 (
size_t (valToCheck) != k_vals.extent (0),
1413 std::logic_error, myPrefix <<
1414 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1415 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1416 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1417 (
size_t (valToCheck) != k_inds.extent (0),
1418 std::logic_error, myPrefix <<
1419 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1420 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1424 myGraph_->setRowPtrsPacked(k_ptrs_const);
1425 myGraph_->lclIndsPacked_wdv =
1432 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1433 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1434 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1435 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1436 valuesPacked_wdv = valuesUnpacked_wdv;
1439 std::ostringstream os;
1440 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1441 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1442 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1443 << valuesUnpacked_wdv.extent(0) << endl;
1444 std::cerr << os.str();
1448 const char myPrefix[] =
1449 "(\"Optimize Storage\"=false branch) ";
1450 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1452 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1453 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1454 "that rowPtrsUnpacked_ was never allocated.");
1455 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1456 const size_t numOffsets = rowPtrsUnpackedHost.extent (0);
1457 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1458 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1459 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1460 std::logic_error, myPrefix <<
1461 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1462 <<
" != valuesPacked_wdv.extent(0) = "
1463 << valuesPacked_wdv.extent (0) <<
".");
1464 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1465 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1466 std::logic_error, myPrefix <<
1467 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1468 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1469 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1475 const char myPrefix[] =
"After packing, ";
1476 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1478 (
size_t (rowPtrsPackedHost.extent (0)) !=
size_t (lclNumRows + 1),
1479 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1480 << rowPtrsPackedHost.extent (0) <<
" != lclNumRows+1 = " <<
1481 (lclNumRows+1) <<
".");
1482 if (rowPtrsPackedHost.extent (0) != 0) {
1483 const size_t numOffsets (rowPtrsPackedHost.extent (0));
1484 const size_t valToCheck = rowPtrsPackedHost(numOffsets-1);
1485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1486 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1487 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1488 (numOffsets-1) <<
") = " << valToCheck
1489 <<
" != valuesPacked_wdv.extent(0) = "
1490 << valuesPacked_wdv.extent (0) <<
".");
1491 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1492 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1493 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1494 (numOffsets-1) <<
") = " << valToCheck
1495 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1496 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1504 const bool defaultOptStorage =
1505 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1506 const bool requestOptimizedStorage =
1507 (! params.is_null () &&
1508 params->get (
"Optimize Storage", defaultOptStorage)) ||
1509 (params.is_null () && defaultOptStorage);
1514 if (requestOptimizedStorage) {
1519 std::ostringstream os;
1520 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1521 << myGraph_->k_numRowEntries_.extent(0) << endl;
1522 std::cerr << os.str();
1525 myGraph_->k_numRowEntries_ = row_entries_type ();
1530 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1531 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1532 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1533 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1534 valuesUnpacked_wdv = valuesPacked_wdv;
1536 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1537 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1541 std::ostringstream os;
1542 os << *prefix <<
"User requested NOT to optimize storage"
1544 std::cerr << os.str();
1549 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1554 using ::Tpetra::Details::ProfilingRegion;
1555 using Teuchos::ArrayRCP;
1556 using Teuchos::Array;
1557 using Teuchos::null;
1561 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1562 using non_const_row_map_type =
typename row_map_type::non_const_type;
1563 using values_type =
typename local_matrix_device_type::values_type;
1564 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1565 const size_t lclNumRows = getLocalNumRows();
1568 std::unique_ptr<std::string> prefix;
1570 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1571 std::ostringstream os;
1572 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1573 std::cerr << os.str ();
1585 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1586 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1587 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1589 row_map_type k_ptrs;
1595 bool requestOptimizedStorage =
true;
1596 const bool default_OptimizeStorage =
1597 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1598 if (! params.is_null() &&
1599 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1600 requestOptimizedStorage =
false;
1607 if (! staticGraph_->isStorageOptimized () &&
1608 requestOptimizedStorage) {
1610 (
true, std::runtime_error,
"You requested optimized storage "
1611 "by setting the \"Optimize Storage\" flag to \"true\" in "
1612 "the ParameterList, or by virtue of default behavior. "
1613 "However, the associated CrsGraph was filled separately and "
1614 "requested not to optimize storage. Therefore, the "
1615 "CrsMatrix cannot optimize storage.");
1616 requestOptimizedStorage =
false;
1641 if (nodeNumEntries != nodeNumAllocated) {
1643 std::ostringstream os;
1644 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1645 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1647 std::cerr << os.str();
1652 std::ostringstream os;
1653 os << *prefix <<
"Allocate packed row offsets: "
1654 << (lclNumRows+1) << endl;
1655 std::cerr << os.str();
1657 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1662 size_t lclTotalNumEntries = 0;
1665 typename row_entries_type::const_type numRowEnt_h =
1666 staticGraph_->k_numRowEntries_;
1668 lclTotalNumEntries =
1675 std::ostringstream os;
1676 os << *prefix <<
"Allocate packed values: "
1677 << lclTotalNumEntries << endl;
1678 std::cerr << os.str ();
1680 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1684 typename values_type::non_const_type,
1685 typename values_type::const_type,
1686 typename row_map_type::non_const_type,
1687 typename row_map_type::const_type> valsPacker
1688 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1689 tmpk_ptrs, k_rowPtrs);
1692 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1693 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1694 range_type (0, lclNumRows), valsPacker);
1698 valuesPacked_wdv = valuesUnpacked_wdv;
1700 std::ostringstream os;
1701 os << *prefix <<
"Storage already packed: "
1702 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1703 std::cerr << os.str();
1708 if (requestOptimizedStorage) {
1711 valuesUnpacked_wdv = valuesPacked_wdv;
1713 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1717 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1722 const typename crs_graph_type::SLocalGlobalViews& newInds,
1723 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1724 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1725 const ELocalGlobal lg,
1726 const ELocalGlobal I)
1728 const size_t oldNumEnt = rowInfo.numEntries;
1729 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1735 if (numInserted > 0) {
1736 const size_t startOffset = oldNumEnt;
1737 memcpy ((
void*) &oldRowVals[startOffset], &newRowVals[0],
1738 numInserted *
sizeof (impl_scalar_type));
1742 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1746 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1747 const Teuchos::ArrayView<const Scalar>& values,
1751 const char tfecfFuncName[] =
"insertLocalValues: ";
1753 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1754 (! this->isFillActive (), std::runtime_error,
1755 "Fill is not active. After calling fillComplete, you must call "
1756 "resumeFill before you may insert entries into the matrix again.");
1757 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1758 (this->isStaticGraph (), std::runtime_error,
1759 "Cannot insert indices with static graph; use replaceLocalValues() "
1763 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1764 (graph.
colMap_.is_null (), std::runtime_error,
1765 "Cannot insert local indices without a column map.");
1766 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1768 std::runtime_error,
"Graph indices are global; use "
1769 "insertGlobalValues().");
1770 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1771 (values.size () != indices.size (), std::runtime_error,
1772 "values.size() = " << values.size ()
1773 <<
" != indices.size() = " << indices.size () <<
".");
1774 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1775 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1776 "Local row index " << lclRow <<
" does not belong to this process.");
1778 if (! graph.indicesAreAllocated ()) {
1782 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1785 #ifdef HAVE_TPETRA_DEBUG
1786 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1791 using Teuchos::toString;
1794 Teuchos::Array<LocalOrdinal> badColInds;
1795 bool allInColMap =
true;
1796 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1798 allInColMap =
false;
1799 badColInds.push_back (indices[k]);
1802 if (! allInColMap) {
1803 std::ostringstream os;
1804 os <<
"You attempted to insert entries in owned row " << lclRow
1805 <<
", at the following column indices: " << toString (indices)
1807 os <<
"Of those, the following indices are not in the column Map on "
1808 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1809 "the matrix has a column Map already, it is invalid to insert "
1810 "entries at those locations.";
1811 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1812 (
true, std::invalid_argument, os.str ());
1815 #endif // HAVE_TPETRA_DEBUG
1819 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1821 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1822 valsView[offset] += values[k]; };
1823 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1824 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1825 }
else if (CM ==
INSERT) {
1826 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1827 valsView[offset] = values[k]; };
1828 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1829 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1831 std::ostringstream os;
1832 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1833 <<
"but this has not been implemented." << endl;
1834 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1835 (
true, std::invalid_argument, os.str ());
1839 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1843 const LocalOrdinal numEnt,
1844 const Scalar vals[],
1845 const LocalOrdinal cols[],
1848 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1849 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1850 this->insertLocalValues (localRow, colsT, valsT, CM);
1853 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1858 const GlobalOrdinal gblColInds[],
1860 const size_t numInputEnt)
1862 #ifdef HAVE_TPETRA_DEBUG
1863 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1865 const size_t curNumEnt = rowInfo.numEntries;
1866 #endif // HAVE_TPETRA_DEBUG
1868 if (! graph.indicesAreAllocated ()) {
1871 using ::Tpetra::Details::Behavior;
1872 const bool verbose = Behavior::verbose(
"CrsMatrix");
1873 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1878 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1881 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1882 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1883 valsView[offset] += vals[k];
1885 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1886 #ifdef HAVE_TPETRA_DEBUG
1892 #ifdef HAVE_TPETRA_DEBUG
1893 size_t newNumEnt = curNumEnt + numInserted;
1894 const size_t chkNewNumEnt =
1896 if (chkNewNumEnt != newNumEnt) {
1897 std::ostringstream os;
1898 os << std::endl <<
"newNumEnt = " << newNumEnt
1899 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1900 <<
") = " << chkNewNumEnt <<
"." << std::endl
1901 <<
"\torigNumEnt: " << origNumEnt << std::endl
1902 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1903 <<
"\tgblColInds: [";
1904 for (
size_t k = 0; k < numInputEnt; ++k) {
1905 os << gblColInds[k];
1906 if (k +
size_t (1) < numInputEnt) {
1910 os <<
"]" << std::endl
1912 for (
size_t k = 0; k < numInputEnt; ++k) {
1914 if (k +
size_t (1) < numInputEnt) {
1918 os <<
"]" << std::endl;
1920 if (this->supportsRowViews ()) {
1921 values_host_view_type vals2;
1922 if (this->isGloballyIndexed ()) {
1923 global_inds_host_view_type gblColInds2;
1924 const GlobalOrdinal gblRow =
1925 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1927 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1928 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1932 bool getViewThrew =
false;
1934 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1936 catch (std::exception& e) {
1937 getViewThrew =
true;
1938 os <<
"getGlobalRowView threw exception:" << std::endl
1939 << e.what () << std::endl;
1941 if (! getViewThrew) {
1942 os <<
"\tNew global column indices: ";
1943 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1944 os << gblColInds2[jjj] <<
" ";
1946 os <<
"\tNew values: ";
1947 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1948 os << vals2[jjj] <<
" ";
1953 else if (this->isLocallyIndexed ()) {
1954 local_inds_host_view_type lclColInds2;
1955 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1956 os <<
"\tNew local column indices: ";
1957 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1958 os << lclColInds2[jjj] <<
" ";
1960 os <<
"\tNew values: ";
1961 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1962 os << vals2[jjj] <<
" ";
1967 os <<
"Please report this bug to the Tpetra developers.";
1968 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1969 (
true, std::logic_error, os.str ());
1971 #endif // HAVE_TPETRA_DEBUG
1974 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1978 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1979 const Teuchos::ArrayView<const Scalar>& values)
1981 using Teuchos::toString;
1984 typedef LocalOrdinal LO;
1985 typedef GlobalOrdinal GO;
1986 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
1987 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
1988 const char tfecfFuncName[] =
"insertGlobalValues: ";
1990 #ifdef HAVE_TPETRA_DEBUG
1991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1992 (values.size () != indices.size (), std::runtime_error,
1993 "values.size() = " << values.size () <<
" != indices.size() = "
1994 << indices.size () <<
".");
1995 #endif // HAVE_TPETRA_DEBUG
1999 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2002 if (lclRow == OTLO::invalid ()) {
2009 this->insertNonownedGlobalValues (gblRow, indices, values);
2012 if (this->isStaticGraph ()) {
2014 const int myRank = rowMap.getComm ()->getRank ();
2015 const int numProcs = rowMap.getComm ()->getSize ();
2016 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2017 (
true, std::runtime_error,
2018 "The matrix was constructed with a constant (\"static\") graph, "
2019 "yet the given global row index " << gblRow <<
" is in the row "
2020 "Map on the calling process (with rank " << myRank <<
", of " <<
2021 numProcs <<
" process(es)). In this case, you may not insert "
2022 "new entries into rows owned by the calling process.");
2026 const IST*
const inputVals =
2027 reinterpret_cast<const IST*
> (values.getRawPtr ());
2028 const GO*
const inputGblColInds = indices.getRawPtr ();
2029 const size_t numInputEnt = indices.size ();
2038 if (! graph.
colMap_.is_null ()) {
2044 #ifdef HAVE_TPETRA_DEBUG
2045 Teuchos::Array<GO> badColInds;
2046 #endif // HAVE_TPETRA_DEBUG
2047 const size_type numEntriesToInsert = indices.size ();
2048 bool allInColMap =
true;
2049 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2051 allInColMap =
false;
2052 #ifdef HAVE_TPETRA_DEBUG
2053 badColInds.push_back (indices[k]);
2056 #endif // HAVE_TPETRA_DEBUG
2059 if (! allInColMap) {
2060 std::ostringstream os;
2061 os <<
"You attempted to insert entries in owned row " << gblRow
2062 <<
", at the following column indices: " << toString (indices)
2064 #ifdef HAVE_TPETRA_DEBUG
2065 os <<
"Of those, the following indices are not in the column Map "
2066 "on this process: " << toString (badColInds) <<
"." << endl
2067 <<
"Since the matrix has a column Map already, it is invalid "
2068 "to insert entries at those locations.";
2070 os <<
"At least one of those indices is not in the column Map "
2071 "on this process." << endl <<
"It is invalid to insert into "
2072 "columns not in the column Map on the process that owns the "
2074 #endif // HAVE_TPETRA_DEBUG
2075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2076 (
true, std::invalid_argument, os.str ());
2080 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2081 inputVals, numInputEnt);
2086 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2090 const LocalOrdinal numEnt,
2091 const Scalar vals[],
2092 const GlobalOrdinal inds[])
2094 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2095 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2096 this->insertGlobalValues (globalRow, indsT, valsT);
2100 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2104 const GlobalOrdinal gblRow,
2105 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2106 const Teuchos::ArrayView<const Scalar>& values,
2109 typedef impl_scalar_type IST;
2110 typedef LocalOrdinal LO;
2111 typedef GlobalOrdinal GO;
2112 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2113 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117 (values.size () != indices.size (), std::runtime_error,
2118 "values.size() = " << values.size () <<
" != indices.size() = "
2119 << indices.size () <<
".");
2124 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2125 const LO lclRow = rowMap.getLocalElement (gblRow);
2126 if (lclRow == OTLO::invalid ()) {
2133 this->insertNonownedGlobalValues (gblRow, indices, values);
2136 if (this->isStaticGraph ()) {
2138 const int myRank = rowMap.getComm ()->getRank ();
2139 const int numProcs = rowMap.getComm ()->getSize ();
2140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2141 (
true, std::runtime_error,
2142 "The matrix was constructed with a constant (\"static\") graph, "
2143 "yet the given global row index " << gblRow <<
" is in the row "
2144 "Map on the calling process (with rank " << myRank <<
", of " <<
2145 numProcs <<
" process(es)). In this case, you may not insert "
2146 "new entries into rows owned by the calling process.");
2149 crs_graph_type& graph = * (this->myGraph_);
2150 const IST*
const inputVals =
2151 reinterpret_cast<const IST*
> (values.getRawPtr ());
2152 const GO*
const inputGblColInds = indices.getRawPtr ();
2153 const size_t numInputEnt = indices.size ();
2154 RowInfo rowInfo = graph.getRowInfo (lclRow);
2156 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2163 const map_type& colMap = * (graph.colMap_);
2164 size_t curOffset = 0;
2165 while (curOffset < numInputEnt) {
2169 Teuchos::Array<LO> lclIndices;
2170 size_t endOffset = curOffset;
2171 for ( ; endOffset < numInputEnt; ++endOffset) {
2172 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2173 if (lclIndex != OTLO::invalid())
2174 lclIndices.push_back(lclIndex);
2181 const LO numIndInSeq = (endOffset - curOffset);
2182 if (numIndInSeq != 0) {
2183 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2189 const bool invariant = endOffset == numInputEnt ||
2190 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2191 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2192 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2194 curOffset = endOffset + 1;
2197 else if (! graph.colMap_.is_null ()) {
2198 const map_type& colMap = * (graph.colMap_);
2199 size_t curOffset = 0;
2200 while (curOffset < numInputEnt) {
2204 size_t endOffset = curOffset;
2205 for ( ; endOffset < numInputEnt &&
2206 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2212 const LO numIndInSeq = (endOffset - curOffset);
2213 if (numIndInSeq != 0) {
2214 rowInfo = graph.getRowInfo(lclRow);
2215 this->insertGlobalValuesImpl (graph, rowInfo,
2216 inputGblColInds + curOffset,
2217 inputVals + curOffset,
2224 const bool invariant = endOffset == numInputEnt ||
2225 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2227 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2229 curOffset = endOffset + 1;
2233 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2234 inputVals, numInputEnt);
2239 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2241 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2242 insertGlobalValuesFilteredChecked(
2243 const GlobalOrdinal gblRow,
2244 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2245 const Teuchos::ArrayView<const Scalar>& values,
2246 const char*
const prefix,
2254 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2256 catch(std::exception& e) {
2257 std::ostringstream os;
2259 const size_t maxNumToPrint =
2261 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2262 "exception: " << e.what() << endl
2263 <<
"Global row index: " << gblRow << endl;
2271 os <<
": insertGlobalValuesFiltered threw an exception: "
2274 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2278 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2284 const LocalOrdinal inds[],
2286 const LocalOrdinal numElts)
2288 typedef LocalOrdinal LO;
2289 typedef GlobalOrdinal GO;
2290 const bool sorted = graph.
isSorted ();
2300 for (LO j = 0; j < numElts; ++j) {
2301 const LO lclColInd = inds[j];
2302 const size_t offset =
2303 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2304 lclColInd, hint, sorted);
2305 if (offset != rowInfo.numEntries) {
2306 rowVals[offset] = newVals[j];
2313 if (graph.
colMap_.is_null ()) {
2314 return Teuchos::OrdinalTraits<LO>::invalid ();
2322 for (LO j = 0; j < numElts; ++j) {
2324 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2325 const size_t offset =
2326 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2327 gblColInd, hint, sorted);
2328 if (offset != rowInfo.numEntries) {
2329 rowVals[offset] = newVals[j];
2348 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2352 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2353 const Teuchos::ArrayView<const Scalar>& vals)
2355 typedef LocalOrdinal LO;
2357 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2358 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2359 return Teuchos::OrdinalTraits<LO>::invalid ();
2361 const LO*
const inputInds = lclCols.getRawPtr ();
2362 const Scalar*
const inputVals = vals.getRawPtr ();
2363 return this->replaceLocalValues (localRow, numInputEnt,
2364 inputVals, inputInds);
2367 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2373 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2374 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2377 const LO numInputEnt = inputInds.extent(0);
2378 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2379 return Teuchos::OrdinalTraits<LO>::invalid();
2381 const Scalar*
const inVals =
2382 reinterpret_cast<const Scalar*
>(inputVals.data());
2383 return this->replaceLocalValues(localRow, numInputEnt,
2384 inVals, inputInds.data());
2387 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2391 const LocalOrdinal numEnt,
2392 const Scalar inputVals[],
2393 const LocalOrdinal inputCols[])
2396 typedef LocalOrdinal LO;
2398 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2400 return Teuchos::OrdinalTraits<LO>::invalid ();
2405 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2408 return static_cast<LO
> (0);
2410 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2411 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2412 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2413 inputCols, inVals, numEnt);
2416 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2422 const GlobalOrdinal inds[],
2424 const LocalOrdinal numElts)
2426 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2428 [&](
size_t const k,
size_t const ,
size_t const offset) {
2429 rowVals[offset] = newVals[k];
2431 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2435 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2439 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2440 const Teuchos::ArrayView<const Scalar>& inputVals)
2442 typedef LocalOrdinal LO;
2444 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2445 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2446 return Teuchos::OrdinalTraits<LO>::invalid ();
2448 return this->replaceGlobalValues (globalRow, numInputEnt,
2449 inputVals.getRawPtr (),
2450 inputGblColInds.getRawPtr ());
2453 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2457 const LocalOrdinal numEnt,
2458 const Scalar inputVals[],
2459 const GlobalOrdinal inputGblColInds[])
2462 typedef LocalOrdinal LO;
2464 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2466 return Teuchos::OrdinalTraits<LO>::invalid ();
2471 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2474 return static_cast<LO
> (0);
2477 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2478 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2479 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2480 inputGblColInds, inVals, numEnt);
2483 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2489 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2490 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2499 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2500 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2501 return Teuchos::OrdinalTraits<LO>::invalid();
2503 const Scalar*
const inVals =
2504 reinterpret_cast<const Scalar*
>(inputVals.data());
2505 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2509 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2515 const GlobalOrdinal inds[],
2517 const LocalOrdinal numElts,
2520 typedef LocalOrdinal LO;
2521 typedef GlobalOrdinal GO;
2523 const bool sorted = graph.
isSorted ();
2532 if (graph.
colMap_.is_null ()) {
2543 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2545 for (LO j = 0; j < numElts; ++j) {
2547 if (lclColInd != LINV) {
2548 const size_t offset =
2549 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2550 lclColInd, hint, sorted);
2551 if (offset != rowInfo.numEntries) {
2553 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2556 rowVals[offset] += newVals[j];
2569 for (LO j = 0; j < numElts; ++j) {
2570 const GO gblColInd = inds[j];
2571 const size_t offset =
2572 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2573 gblColInd, hint, sorted);
2574 if (offset != rowInfo.numEntries) {
2576 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2579 rowVals[offset] += newVals[j];
2593 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2597 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2598 const Teuchos::ArrayView<const Scalar>& inputVals,
2601 typedef LocalOrdinal LO;
2603 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2604 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2605 return Teuchos::OrdinalTraits<LO>::invalid ();
2607 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2608 inputVals.getRawPtr (),
2609 inputGblColInds.getRawPtr (),
2613 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2617 const LocalOrdinal numInputEnt,
2618 const Scalar inputVals[],
2619 const GlobalOrdinal inputGblColInds[],
2623 typedef LocalOrdinal LO;
2624 typedef GlobalOrdinal GO;
2626 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2628 return Teuchos::OrdinalTraits<LO>::invalid ();
2633 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2638 using Teuchos::ArrayView;
2639 ArrayView<const GO> inputGblColInds_av(
2640 numInputEnt == 0 ?
nullptr : inputGblColInds,
2642 ArrayView<const Scalar> inputVals_av(
2643 numInputEnt == 0 ?
nullptr :
2644 inputVals, numInputEnt);
2649 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2660 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2661 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2662 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2663 inputGblColInds, inVals,
2664 numInputEnt, atomic);
2668 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2672 const LocalOrdinal numInputEnt,
2673 const impl_scalar_type inputVals[],
2674 const LocalOrdinal inputCols[],
2675 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2678 using Tpetra::Details::OrdinalTraits;
2679 typedef LocalOrdinal LO;
2681 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2683 return Teuchos::OrdinalTraits<LO>::invalid ();
2685 const crs_graph_type& graph = * (this->staticGraph_);
2686 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2688 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2691 return static_cast<LO
> (0);
2693 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2694 return this->transformLocalValues (curRowVals.data (), graph,
2695 rowInfo, inputCols, inputVals,
2696 numInputEnt, f, atomic);
2699 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2701 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2702 transformGlobalValues (
const GlobalOrdinal gblRow,
2703 const LocalOrdinal numInputEnt,
2704 const impl_scalar_type inputVals[],
2705 const GlobalOrdinal inputCols[],
2706 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2709 using Tpetra::Details::OrdinalTraits;
2710 typedef LocalOrdinal LO;
2712 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2714 return OrdinalTraits<LO>::invalid ();
2716 const crs_graph_type& graph = * (this->staticGraph_);
2717 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2719 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2722 return static_cast<LO
> (0);
2724 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2725 return this->transformGlobalValues (curRowVals.data (), graph,
2726 rowInfo, inputCols, inputVals,
2727 numInputEnt, f, atomic);
2730 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2732 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2733 transformLocalValues (impl_scalar_type rowVals[],
2734 const crs_graph_type& graph,
2735 const RowInfo& rowInfo,
2736 const LocalOrdinal inds[],
2737 const impl_scalar_type newVals[],
2738 const LocalOrdinal numElts,
2739 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2742 typedef impl_scalar_type ST;
2743 typedef LocalOrdinal LO;
2744 typedef GlobalOrdinal GO;
2751 const bool sorted = graph.isSorted ();
2756 if (graph.isLocallyIndexed ()) {
2759 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2761 for (LO j = 0; j < numElts; ++j) {
2762 const LO lclColInd = inds[j];
2763 const size_t offset =
2764 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2765 lclColInd, hint, sorted);
2766 if (offset != rowInfo.numEntries) {
2775 volatile ST*
const dest = &rowVals[offset];
2776 (void) atomic_binary_function_update (dest, newVals[j], f);
2780 rowVals[offset] = f (rowVals[offset], newVals[j]);
2787 else if (graph.isGloballyIndexed ()) {
2791 if (graph.colMap_.is_null ()) {
2798 const map_type& colMap = * (graph.colMap_);
2801 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2803 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2804 for (LO j = 0; j < numElts; ++j) {
2805 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2806 if (gblColInd != GINV) {
2807 const size_t offset =
2808 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2809 gblColInd, hint, sorted);
2810 if (offset != rowInfo.numEntries) {
2819 volatile ST*
const dest = &rowVals[offset];
2820 (void) atomic_binary_function_update (dest, newVals[j], f);
2824 rowVals[offset] = f (rowVals[offset], newVals[j]);
2839 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2841 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2842 transformGlobalValues (impl_scalar_type rowVals[],
2843 const crs_graph_type& graph,
2844 const RowInfo& rowInfo,
2845 const GlobalOrdinal inds[],
2846 const impl_scalar_type newVals[],
2847 const LocalOrdinal numElts,
2848 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2851 typedef impl_scalar_type ST;
2852 typedef LocalOrdinal LO;
2853 typedef GlobalOrdinal GO;
2860 const bool sorted = graph.isSorted ();
2865 if (graph.isGloballyIndexed ()) {
2868 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2870 for (LO j = 0; j < numElts; ++j) {
2871 const GO gblColInd = inds[j];
2872 const size_t offset =
2873 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2874 gblColInd, hint, sorted);
2875 if (offset != rowInfo.numEntries) {
2884 volatile ST*
const dest = &rowVals[offset];
2885 (void) atomic_binary_function_update (dest, newVals[j], f);
2889 rowVals[offset] = f (rowVals[offset], newVals[j]);
2896 else if (graph.isLocallyIndexed ()) {
2900 if (graph.colMap_.is_null ()) {
2906 const map_type& colMap = * (graph.colMap_);
2909 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2911 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2912 for (LO j = 0; j < numElts; ++j) {
2913 const LO lclColInd = colMap.getLocalElement (inds[j]);
2914 if (lclColInd != LINV) {
2915 const size_t offset =
2916 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2917 lclColInd, hint, sorted);
2918 if (offset != rowInfo.numEntries) {
2927 volatile ST*
const dest = &rowVals[offset];
2928 (void) atomic_binary_function_update (dest, newVals[j], f);
2932 rowVals[offset] = f (rowVals[offset], newVals[j]);
2947 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2953 const LocalOrdinal inds[],
2955 const LocalOrdinal numElts,
2958 typedef LocalOrdinal LO;
2959 typedef GlobalOrdinal GO;
2961 const bool sorted = graph.
isSorted ();
2971 for (LO j = 0; j < numElts; ++j) {
2972 const LO lclColInd = inds[j];
2973 const size_t offset =
2974 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2975 lclColInd, hint, sorted);
2976 if (offset != rowInfo.numEntries) {
2978 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2981 rowVals[offset] += newVals[j];
2989 if (graph.
colMap_.is_null ()) {
2990 return Teuchos::OrdinalTraits<LO>::invalid ();
2998 for (LO j = 0; j < numElts; ++j) {
3000 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3001 const size_t offset =
3002 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3003 gblColInd, hint, sorted);
3004 if (offset != rowInfo.numEntries) {
3006 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3009 rowVals[offset] += newVals[j];
3029 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3033 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3034 const Teuchos::ArrayView<const Scalar>& values,
3038 const LO numInputEnt =
static_cast<LO
>(indices.size());
3039 if (static_cast<LO>(values.size()) != numInputEnt) {
3040 return Teuchos::OrdinalTraits<LO>::invalid();
3042 const LO*
const inputInds = indices.getRawPtr();
3043 const scalar_type*
const inputVals = values.getRawPtr();
3044 return this->sumIntoLocalValues(localRow, numInputEnt,
3045 inputVals, inputInds, atomic);
3048 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3054 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3055 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3059 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3060 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3061 return Teuchos::OrdinalTraits<LO>::invalid();
3064 reinterpret_cast<const scalar_type*
>(inputVals.data());
3065 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3066 inputInds.data(), atomic);
3069 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3073 const LocalOrdinal numEnt,
3074 const Scalar vals[],
3075 const LocalOrdinal cols[],
3079 typedef LocalOrdinal LO;
3081 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3083 return Teuchos::OrdinalTraits<LO>::invalid ();
3088 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3091 return static_cast<LO
> (0);
3093 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3094 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3095 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3096 cols, inputVals, numEnt, atomic);
3099 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3101 values_dualv_type::t_host::const_type
3105 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3106 return typename values_dualv_type::t_host::const_type ();
3108 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3113 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3115 values_dualv_type::t_host
3119 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3120 return typename values_dualv_type::t_host ();
3122 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3127 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3129 values_dualv_type::t_dev::const_type
3133 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3134 return typename values_dualv_type::t_dev::const_type ();
3136 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3141 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3143 values_dualv_type::t_dev
3147 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3148 return typename values_dualv_type::t_dev ();
3150 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3156 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3160 nonconst_local_inds_host_view_type &indices,
3161 nonconst_values_host_view_type &values,
3162 size_t& numEntries)
const
3164 using Teuchos::ArrayView;
3165 using Teuchos::av_reinterpret_cast;
3166 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3168 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3169 (! this->hasColMap (), std::runtime_error,
3170 "The matrix does not have a column Map yet. This means we don't have "
3171 "local indices for columns yet, so it doesn't make sense to call this "
3172 "method. If the matrix doesn't have a column Map yet, you should call "
3173 "fillComplete on it first.");
3175 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3176 const size_t theNumEntries = rowinfo.numEntries;
3177 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3178 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3179 static_cast<size_t> (values.size ()) < theNumEntries,
3180 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3181 theNumEntries <<
" entry/ies, but indices.size() = " <<
3182 indices.size () <<
" and values.size() = " << values.size () <<
".");
3183 numEntries = theNumEntries;
3185 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3186 if (staticGraph_->isLocallyIndexed ()) {
3187 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3188 auto curVals = getValuesViewHost(rowinfo);
3190 for (
size_t j = 0; j < theNumEntries; ++j) {
3191 values[j] = curVals[j];
3192 indices[j] = curLclInds(j);
3195 else if (staticGraph_->isGloballyIndexed ()) {
3197 const map_type& colMap = * (staticGraph_->colMap_);
3198 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3199 auto curVals = getValuesViewHost(rowinfo);
3201 for (
size_t j = 0; j < theNumEntries; ++j) {
3202 values[j] = curVals[j];
3210 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3214 nonconst_global_inds_host_view_type &indices,
3215 nonconst_values_host_view_type &values,
3216 size_t& numEntries)
const
3218 using Teuchos::ArrayView;
3219 using Teuchos::av_reinterpret_cast;
3220 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3223 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3224 const size_t theNumEntries = rowinfo.numEntries;
3225 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3226 static_cast<size_t> (indices.size ()) < theNumEntries ||
3227 static_cast<size_t> (values.size ()) < theNumEntries,
3228 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3229 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3230 indices.size () <<
" and values.size() = " << values.size () <<
".");
3231 numEntries = theNumEntries;
3233 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3234 if (staticGraph_->isLocallyIndexed ()) {
3235 const map_type& colMap = * (staticGraph_->colMap_);
3236 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3237 auto curVals = getValuesViewHost(rowinfo);
3239 for (
size_t j = 0; j < theNumEntries; ++j) {
3240 values[j] = curVals[j];
3244 else if (staticGraph_->isGloballyIndexed ()) {
3245 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3246 auto curVals = getValuesViewHost(rowinfo);
3248 for (
size_t j = 0; j < theNumEntries; ++j) {
3249 values[j] = curVals[j];
3250 indices[j] = curGblInds(j);
3257 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3261 local_inds_host_view_type &indices,
3262 values_host_view_type &values)
const
3264 const char tfecfFuncName[] =
"getLocalRowView: ";
3266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3267 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3268 "its indices as global indices, so you cannot get a view with local "
3269 "column indices. If the matrix has a column Map, you may call "
3270 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3271 "a view with global column indices by calling getGlobalRowCopy().");
3273 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3274 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3275 rowInfo.numEntries > 0) {
3276 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3280 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3287 indices = local_inds_host_view_type();
3288 values = values_host_view_type();
3291 #ifdef HAVE_TPETRA_DEBUG
3292 const char suffix[] =
". This should never happen. Please report this "
3293 "bug to the Tpetra developers.";
3294 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3295 (static_cast<size_t> (indices.size ()) !=
3296 static_cast<size_t> (values.size ()), std::logic_error,
3297 "At the end of this method, for local row " << localRow <<
", "
3298 "indices.size() = " << indices.size () <<
" != values.size () = "
3299 << values.size () << suffix);
3300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3301 (static_cast<size_t> (indices.size ()) !=
3302 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3303 "At the end of this method, for local row " << localRow <<
", "
3304 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3305 << rowInfo.numEntries << suffix);
3306 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3307 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3308 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3309 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3310 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3311 expectedNumEntries << suffix);
3312 #endif // HAVE_TPETRA_DEBUG
3316 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3320 global_inds_host_view_type &indices,
3321 values_host_view_type &values)
const
3323 const char tfecfFuncName[] =
"getGlobalRowView: ";
3325 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3326 isLocallyIndexed (), std::runtime_error,
3327 "The matrix is locally indexed, so we cannot return a view of the row "
3328 "with global column indices. Use getGlobalRowCopy() instead.");
3333 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3334 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3335 rowInfo.numEntries > 0) {
3336 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3339 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3344 indices = global_inds_host_view_type();
3345 values = values_host_view_type();
3348 #ifdef HAVE_TPETRA_DEBUG
3349 const char suffix[] =
". This should never happen. Please report this "
3350 "bug to the Tpetra developers.";
3351 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3352 (static_cast<size_t> (indices.size ()) !=
3353 static_cast<size_t> (values.size ()), std::logic_error,
3354 "At the end of this method, for global row " << globalRow <<
", "
3355 "indices.size() = " << indices.size () <<
" != values.size () = "
3356 << values.size () << suffix);
3357 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3358 (static_cast<size_t> (indices.size ()) !=
3359 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3360 "At the end of this method, for global row " << globalRow <<
", "
3361 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3362 << rowInfo.numEntries << suffix);
3363 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3364 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3365 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3366 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3367 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3368 " " << expectedNumEntries << suffix);
3369 #endif // HAVE_TPETRA_DEBUG
3373 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3380 const size_t nlrs = staticGraph_->getLocalNumRows ();
3381 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3382 if (! staticGraph_->indicesAreAllocated () ||
3383 nlrs == 0 || numEntries == 0) {
3388 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3389 KokkosBlas::scal(vals, theAlpha, vals);
3394 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3405 const size_t numEntries = staticGraph_->getLocalNumEntries();
3406 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3414 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3418 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3421 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3422 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3423 const typename local_matrix_device_type::values_type& values)
3426 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3427 const char tfecfFuncName[] =
"setAllValues: ";
3428 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3429 (columnIndices.size () != values.size (), std::invalid_argument,
3430 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3431 " = " << values.size () <<
".");
3432 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3433 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3436 myGraph_->setAllIndices (rowPointers, columnIndices);
3438 catch (std::exception &e) {
3439 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3440 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3441 "exception: " << e.what ());
3448 auto lclGraph = myGraph_->getLocalGraphDevice ();
3449 const size_t numEnt = lclGraph.entries.extent (0);
3450 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3451 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3452 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3453 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3454 "local graph. Please report this bug to the Tpetra developers.");
3457 valuesUnpacked_wdv = valuesPacked_wdv;
3461 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3463 checkInternalState ();
3466 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3472 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3474 auto graph = localDeviceMatrix.graph;
3477 auto rows = graph.row_map;
3478 auto columns = graph.entries;
3479 auto values = localDeviceMatrix.values;
3481 setAllValues(rows,columns,values);
3484 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3488 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3489 const Teuchos::ArrayRCP<Scalar>& val)
3491 using Kokkos::Compat::getKokkosViewDeepCopy;
3492 using Teuchos::ArrayRCP;
3493 using Teuchos::av_reinterpret_cast;
3496 typedef typename local_graph_device_type::row_map_type row_map_type;
3498 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3504 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3505 Kokkos::View<
const size_t*,
3506 typename row_map_type::array_layout,
3508 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3511 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3512 (ptrNative.extent (0) != ptrSizeT.extent (0),
3513 std::logic_error,
"ptrNative.extent(0) = " <<
3514 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3515 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3516 "Tpetra developers.");
3518 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3519 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3520 this->setAllValues (ptrNative, indIn, valIn);
3523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3528 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3530 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3537 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3538 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3539 offsets.resize (lclNumRows);
3545 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3550 Kokkos::MemoryUnmanaged> output_type;
3551 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3552 staticGraph_->getLocalDiagOffsets (offsetsOut);
3555 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3556 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3557 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3558 Kokkos::MemoryUnmanaged> output_type;
3559 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3565 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3570 using Teuchos::ArrayRCP;
3571 using Teuchos::ArrayView;
3572 using Teuchos::av_reinterpret_cast;
3573 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3578 staticGraph_.is_null (), std::runtime_error,
3579 "This method requires that the matrix have a graph.");
3580 auto rowMapPtr = this->getRowMap ();
3581 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3587 auto colMapPtr = this->getColMap ();
3588 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3589 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3590 "This method requires that the matrix have a column Map.");
3591 const map_type& rowMap = * rowMapPtr;
3592 const map_type& colMap = * colMapPtr;
3593 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3595 #ifdef HAVE_TPETRA_DEBUG
3598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3599 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3600 "The input Vector's Map must be compatible with the CrsMatrix's row "
3601 "Map. You may check this by using Map's isCompatible method: "
3602 "diag.getMap ()->isCompatible (A.getRowMap ());");
3603 #endif // HAVE_TPETRA_DEBUG
3605 if (this->isFillComplete ()) {
3608 const auto D_lcl_1d =
3609 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3611 const auto lclRowMap = rowMap.getLocalMap ();
3616 getLocalMatrixDevice ());
3624 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3629 Kokkos::MemoryUnmanaged>& offsets)
const
3631 typedef LocalOrdinal LO;
3633 #ifdef HAVE_TPETRA_DEBUG
3634 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3635 const map_type& rowMap = * (this->getRowMap ());
3638 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3639 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3640 "The input Vector's Map must be compatible with (in the sense of Map::"
3641 "isCompatible) the CrsMatrix's row Map.");
3642 #endif // HAVE_TPETRA_DEBUG
3652 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3655 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3657 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3658 getLocalMatrixDevice ());
3661 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3665 const Teuchos::ArrayView<const size_t>& offsets)
const
3667 using LO = LocalOrdinal;
3668 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3671 #ifdef HAVE_TPETRA_DEBUG
3672 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3673 const map_type& rowMap = * (this->getRowMap ());
3676 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3677 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3678 "The input Vector's Map must be compatible with (in the sense of Map::"
3679 "isCompatible) the CrsMatrix's row Map.");
3680 #endif // HAVE_TPETRA_DEBUG
3692 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3694 using host_offsets_view_type =
3695 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3696 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3697 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3699 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3700 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3701 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3703 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3704 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3705 Kokkos::parallel_for
3706 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3707 range_type (0, myNumRows),
3708 [&, INV, h_offsets] (
const LO lclRow) {
3709 lclVecHost1d(lclRow) = STS::zero ();
3710 if (h_offsets[lclRow] != INV) {
3711 auto curRowOffset = rowPtrsPackedHost (lclRow);
3712 lclVecHost1d(lclRow) =
3713 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3720 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3725 using ::Tpetra::Details::ProfilingRegion;
3726 using Teuchos::ArrayRCP;
3727 using Teuchos::ArrayView;
3728 using Teuchos::null;
3731 using Teuchos::rcpFromRef;
3733 const char tfecfFuncName[] =
"leftScale: ";
3735 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3737 RCP<const vec_type> xp;
3738 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3741 auto exporter = this->getCrsGraphRef ().getExporter ();
3742 if (exporter.get () !=
nullptr) {
3743 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3744 tempVec->doImport (x, *exporter,
REPLACE);
3748 xp = rcpFromRef (x);
3751 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3752 xp = rcpFromRef (x);
3755 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3756 (
true, std::invalid_argument,
"x's Map must be the same as "
3757 "either the row Map or the range Map of the CrsMatrix.");
3760 if (this->isFillComplete()) {
3761 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3762 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3765 x_lcl_1d,
false,
false);
3769 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3770 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3775 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3780 using ::Tpetra::Details::ProfilingRegion;
3781 using Teuchos::ArrayRCP;
3782 using Teuchos::ArrayView;
3783 using Teuchos::null;
3786 using Teuchos::rcpFromRef;
3788 const char tfecfFuncName[] =
"rightScale: ";
3790 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3792 RCP<const vec_type> xp;
3793 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3796 auto importer = this->getCrsGraphRef ().getImporter ();
3797 if (importer.get () !=
nullptr) {
3798 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3799 tempVec->doImport (x, *importer,
REPLACE);
3803 xp = rcpFromRef (x);
3806 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3807 xp = rcpFromRef (x);
3809 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3810 (
true, std::runtime_error,
"x's Map must be the same as "
3811 "either the domain Map or the column Map of the CrsMatrix.");
3814 if (this->isFillComplete()) {
3815 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3816 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3819 x_lcl_1d,
false,
false);
3823 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3824 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3829 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3834 using Teuchos::ArrayView;
3835 using Teuchos::outArg;
3836 using Teuchos::REDUCE_SUM;
3837 using Teuchos::reduceAll;
3845 if (getLocalNumEntries() > 0) {
3846 if (isStorageOptimized ()) {
3849 const size_t numEntries = getLocalNumEntries ();
3850 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3851 for (
size_t k = 0; k < numEntries; ++k) {
3852 auto val = values[k];
3856 const mag_type val_abs = STS::abs (val);
3857 mySum += val_abs * val_abs;
3861 const LocalOrdinal numRows =
3862 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3863 for (LocalOrdinal r = 0; r < numRows; ++r) {
3864 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3865 const size_t numEntries = rowInfo.numEntries;
3866 auto A_r = this->getValuesViewHost(rowInfo);
3867 for (
size_t k = 0; k < numEntries; ++k) {
3869 const mag_type val_abs = STS::abs (val);
3870 mySum += val_abs * val_abs;
3876 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3877 mySum, outArg (totalSum));
3878 return STM::sqrt (totalSum);
3881 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3886 const char tfecfFuncName[] =
"replaceColMap: ";
3890 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3891 myGraph_.is_null (), std::runtime_error,
3892 "This method does not work if the matrix has a const graph. The whole "
3893 "idea of a const graph is that you are not allowed to change it, but "
3894 "this method necessarily must modify the graph, since the graph owns "
3895 "the matrix's column Map.");
3896 myGraph_->replaceColMap (newColMap);
3899 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3903 const Teuchos::RCP<const map_type>& newColMap,
3904 const Teuchos::RCP<const import_type>& newImport,
3905 const bool sortEachRow)
3907 const char tfecfFuncName[] =
"reindexColumns: ";
3908 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3909 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3910 "The input graph is null, but the matrix does not own its graph.");
3912 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3913 const bool sortGraph =
false;
3917 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3918 const LocalOrdinal lclNumRows =
3919 static_cast<LocalOrdinal
> (theGraph.getLocalNumRows ());
3921 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3923 const RowInfo rowInfo = theGraph.getRowInfo (row);
3924 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
3925 auto vals = this->getValuesViewHostNonConst (rowInfo);
3927 sort2 (lclColInds.data (),
3928 lclColInds.data () + rowInfo.numEntries,
3931 theGraph.indicesAreSorted_ =
true;
3935 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3940 const char tfecfFuncName[] =
"replaceDomainMap: ";
3941 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3942 myGraph_.is_null (), std::runtime_error,
3943 "This method does not work if the matrix has a const graph. The whole "
3944 "idea of a const graph is that you are not allowed to change it, but this"
3945 " method necessarily must modify the graph, since the graph owns the "
3946 "matrix's domain Map and Import objects.");
3947 myGraph_->replaceDomainMap (newDomainMap);
3950 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3954 Teuchos::RCP<const import_type>& newImporter)
3956 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3958 myGraph_.is_null (), std::runtime_error,
3959 "This method does not work if the matrix has a const graph. The whole "
3960 "idea of a const graph is that you are not allowed to change it, but this"
3961 " method necessarily must modify the graph, since the graph owns the "
3962 "matrix's domain Map and Import objects.");
3963 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3966 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3971 const char tfecfFuncName[] =
"replaceRangeMap: ";
3972 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3973 myGraph_.is_null (), std::runtime_error,
3974 "This method does not work if the matrix has a const graph. The whole "
3975 "idea of a const graph is that you are not allowed to change it, but this"
3976 " method necessarily must modify the graph, since the graph owns the "
3977 "matrix's domain Map and Import objects.");
3978 myGraph_->replaceRangeMap (newRangeMap);
3981 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3985 Teuchos::RCP<const export_type>& newExporter)
3987 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
3988 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3989 myGraph_.is_null (), std::runtime_error,
3990 "This method does not work if the matrix has a const graph. The whole "
3991 "idea of a const graph is that you are not allowed to change it, but this"
3992 " method necessarily must modify the graph, since the graph owns the "
3993 "matrix's domain Map and Import objects.");
3994 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
3997 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4001 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4002 const Teuchos::ArrayView<const Scalar>& values)
4004 using Teuchos::Array;
4005 typedef GlobalOrdinal GO;
4006 typedef typename Array<GO>::size_type size_type;
4008 const size_type numToInsert = indices.size ();
4011 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4012 Array<GO>& curRowInds = curRow.first;
4013 Array<Scalar>& curRowVals = curRow.second;
4014 const size_type newCapacity = curRowInds.size () + numToInsert;
4015 curRowInds.reserve (newCapacity);
4016 curRowVals.reserve (newCapacity);
4017 for (size_type k = 0; k < numToInsert; ++k) {
4018 curRowInds.push_back (indices[k]);
4019 curRowVals.push_back (values[k]);
4023 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4030 using Teuchos::Comm;
4031 using Teuchos::outArg;
4034 using Teuchos::REDUCE_MAX;
4035 using Teuchos::REDUCE_MIN;
4036 using Teuchos::reduceAll;
4040 typedef GlobalOrdinal GO;
4041 typedef typename Teuchos::Array<GO>::size_type size_type;
4042 const char tfecfFuncName[] =
"globalAssemble: ";
4043 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4045 const bool verbose = Behavior::verbose(
"CrsMatrix");
4046 std::unique_ptr<std::string> prefix;
4048 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4049 std::ostringstream os;
4050 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4052 std::cerr << os.str();
4054 RCP<const Comm<int> > comm = getComm ();
4056 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4057 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4058 "you may call this method.");
4060 const size_t myNumNonlocalRows = nonlocals_.size ();
4067 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4068 int someoneHasNonlocalRows = 0;
4069 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4070 outArg (someoneHasNonlocalRows));
4071 if (someoneHasNonlocalRows == 0) {
4085 RCP<const map_type> nonlocalRowMap;
4086 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4088 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4089 size_type curPos = 0;
4090 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4091 ++mapIter, ++curPos) {
4092 myNonlocalGblRows[curPos] = mapIter->first;
4095 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4096 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4103 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4104 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4105 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4106 merge2 (gblCols_newEnd, vals_newEnd,
4107 gblCols.begin (), gblCols.end (),
4108 vals.begin (), vals.end ());
4109 gblCols.erase (gblCols_newEnd, gblCols.end ());
4110 vals.erase (vals_newEnd, vals.end ());
4111 numEntPerNonlocalRow[curPos] = gblCols.size ();
4122 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4124 auto iter = std::min_element (myNonlocalGblRows.begin (),
4125 myNonlocalGblRows.end ());
4126 if (iter != myNonlocalGblRows.end ()) {
4127 myMinNonlocalGblRow = *iter;
4130 GO gblMinNonlocalGblRow = 0;
4131 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4132 outArg (gblMinNonlocalGblRow));
4133 const GO indexBase = gblMinNonlocalGblRow;
4134 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4135 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4144 std::ostringstream os;
4145 os << *prefix <<
"Create nonlocal matrix" << endl;
4146 std::cerr << os.str();
4148 RCP<crs_matrix_type> nonlocalMatrix =
4149 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4151 size_type curPos = 0;
4152 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4153 ++mapIter, ++curPos) {
4154 const GO gblRow = mapIter->first;
4156 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4157 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4159 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4171 auto origRowMap = this->getRowMap ();
4172 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4174 int isLocallyComplete = 1;
4176 if (origRowMapIsOneToOne) {
4178 std::ostringstream os;
4179 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4180 std::cerr << os.str();
4182 export_type exportToOrig (nonlocalRowMap, origRowMap);
4184 isLocallyComplete = 0;
4187 std::ostringstream os;
4188 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4189 std::cerr << os.str();
4191 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4196 std::ostringstream os;
4197 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4198 std::cerr << os.str();
4205 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4207 isLocallyComplete = 0;
4215 std::ostringstream os;
4216 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4218 std::cerr << os.str();
4220 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4222 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4228 std::ostringstream os;
4229 os << *prefix <<
"Free nonlocalMatrix" << endl;
4230 std::cerr << os.str();
4232 nonlocalMatrix = Teuchos::null;
4236 std::ostringstream os;
4237 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4238 std::cerr << os.str();
4240 import_type importToOrig (oneToOneRowMap, origRowMap);
4241 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4249 std::ostringstream os;
4250 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4251 std::cerr << os.str();
4253 decltype (nonlocals_) newNonlocals;
4254 std::swap (nonlocals_, newNonlocals);
4263 int isGloballyComplete = 0;
4264 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4265 outArg (isGloballyComplete));
4266 TEUCHOS_TEST_FOR_EXCEPTION
4267 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4268 "you called insertGlobalValues with a global row index which is not in "
4269 "the matrix's row Map on any process in its communicator.");
4272 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4277 if (! isStaticGraph ()) {
4278 myGraph_->resumeFill (params);
4280 #if KOKKOSKERNELS_VERSION >= 40299
4282 applyHelper.reset();
4284 fillComplete_ =
false;
4287 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4291 return getCrsGraphRef ().haveGlobalConstants ();
4294 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4299 const char tfecfFuncName[] =
"fillComplete(params): ";
4301 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4302 (this->getCrsGraph ().is_null (), std::logic_error,
4303 "getCrsGraph() returns null. This should not happen at this point. "
4304 "Please report this bug to the Tpetra developers.");
4314 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4315 Teuchos::RCP<const map_type> domainMap = rangeMap;
4316 this->fillComplete (domainMap, rangeMap, params);
4320 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4324 const Teuchos::RCP<const map_type>& rangeMap,
4325 const Teuchos::RCP<Teuchos::ParameterList>& params)
4329 using Teuchos::ArrayRCP;
4333 const char tfecfFuncName[] =
"fillComplete: ";
4334 ProfilingRegion regionFillComplete
4335 (
"Tpetra::CrsMatrix::fillComplete");
4336 const bool verbose = Behavior::verbose(
"CrsMatrix");
4337 std::unique_ptr<std::string> prefix;
4339 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4340 std::ostringstream os;
4341 os << *prefix << endl;
4342 std::cerr << os.str ();
4345 "Tpetra::CrsMatrix::fillCompete",
4348 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4349 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4350 "Matrix fill state must be active (isFillActive() "
4351 "must be true) before you may call fillComplete().");
4352 const int numProcs = this->getComm ()->getSize ();
4362 bool assertNoNonlocalInserts =
false;
4365 bool sortGhosts =
true;
4367 if (! params.is_null ()) {
4368 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4369 assertNoNonlocalInserts);
4370 if (params->isParameter (
"sort column map ghost gids")) {
4371 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4373 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4374 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4379 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4381 if (! this->myGraph_.is_null ()) {
4382 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4385 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4386 if (this->hasColMap ()) {
4387 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4390 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4395 if (needGlobalAssemble) {
4396 this->globalAssemble ();
4399 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4400 (numProcs == 1 && nonlocals_.size() > 0,
4401 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4402 "An invalid entry (i.e., with row index not in the row Map) must have "
4403 "been submitted to the CrsMatrix.");
4406 if (this->isStaticGraph ()) {
4414 #ifdef HAVE_TPETRA_DEBUG
4432 const bool domainMapsMatch =
4433 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4434 const bool rangeMapsMatch =
4435 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4437 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4438 (! domainMapsMatch, std::runtime_error,
4439 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4440 "The graph cannot be changed because it was given to the CrsMatrix "
4441 "constructor as const. You can fix this by passing in the graph's "
4442 "domain Map and range Map to the matrix's fillComplete call.");
4444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4445 (! rangeMapsMatch, std::runtime_error,
4446 "The CrsMatrix's range Map does not match the graph's range Map. "
4447 "The graph cannot be changed because it was given to the CrsMatrix "
4448 "constructor as const. You can fix this by passing in the graph's "
4449 "domain Map and range Map to the matrix's fillComplete call.");
4450 #endif // HAVE_TPETRA_DEBUG
4454 this->fillLocalMatrix (params);
4462 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4465 Teuchos::Array<int> remotePIDs (0);
4466 const bool mustBuildColMap = ! this->hasColMap ();
4467 if (mustBuildColMap) {
4468 this->myGraph_->makeColMap (remotePIDs);
4473 const std::pair<size_t, std::string> makeIndicesLocalResult =
4474 this->myGraph_->makeIndicesLocal(verbose);
4479 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4480 (makeIndicesLocalResult.first != 0, std::runtime_error,
4481 makeIndicesLocalResult.second);
4483 const bool sorted = this->myGraph_->isSorted ();
4484 const bool merged = this->myGraph_->isMerged ();
4485 this->sortAndMergeIndicesAndValues (sorted, merged);
4490 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4494 this->fillLocalGraphAndMatrix (params);
4496 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4497 params->get (
"compute global constants",
true);
4498 if (callGraphComputeGlobalConstants) {
4499 this->myGraph_->computeGlobalConstants ();
4502 this->myGraph_->computeLocalConstants ();
4504 this->myGraph_->fillComplete_ =
true;
4505 this->myGraph_->checkInternalState ();
4510 this->fillComplete_ =
true;
4513 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4515 this->checkInternalState ();
4519 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4523 const Teuchos::RCP<const map_type> & rangeMap,
4524 const Teuchos::RCP<const import_type>& importer,
4525 const Teuchos::RCP<const export_type>& exporter,
4526 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4528 #ifdef HAVE_TPETRA_MMM_TIMINGS
4530 if(!params.is_null())
4531 label = params->get(
"Timer Label",label);
4532 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4533 using Teuchos::TimeMonitor;
4535 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4538 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4539 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4540 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4541 "must be true) before calling fillComplete().");
4542 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4543 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4546 #ifdef HAVE_TPETRA_MMM_TIMINGS
4547 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4550 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4554 #ifdef HAVE_TPETRA_MMM_TIMINGS
4555 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4558 fillLocalGraphAndMatrix (params);
4563 fillComplete_ =
true;
4566 #ifdef HAVE_TPETRA_DEBUG
4567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4568 ": We're at the end of fillComplete(), but isFillActive() is true. "
4569 "Please report this bug to the Tpetra developers.");
4570 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4571 ": We're at the end of fillComplete(), but isFillActive() is true. "
4572 "Please report this bug to the Tpetra developers.");
4573 #endif // HAVE_TPETRA_DEBUG
4575 #ifdef HAVE_TPETRA_MMM_TIMINGS
4576 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4579 checkInternalState();
4583 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4589 LocalOrdinal* beg = cols;
4590 LocalOrdinal* end = cols + rowLen;
4591 LocalOrdinal* newend = beg;
4593 LocalOrdinal* cur = beg + 1;
4597 while (cur != end) {
4598 if (*cur != *newend) {
4615 return newend - beg;
4618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4623 using ::Tpetra::Details::ProfilingRegion;
4624 typedef LocalOrdinal LO;
4625 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4626 host_execution_space;
4627 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4628 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4629 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4631 if (! sorted || ! merged) {
4632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4633 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4634 "\"static\" (const) graph, since the matrix does not own the graph.");
4635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4636 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4637 "this matrix claims ! isStaticGraph(). "
4638 "Please report this bug to the Tpetra developers.");
4639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4640 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4641 "this method if the graph's storage has already been optimized. "
4642 "Please report this bug to the Tpetra developers.");
4645 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4646 size_t totalNumDups = 0;
4651 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4653 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4654 [=] (
const LO lclRow,
size_t& numDups) {
4655 size_t rowBegin = rowBegins_(lclRow);
4656 size_t rowLen = rowLengths_(lclRow);
4657 LO* cols = cols_.data() + rowBegin;
4660 sort2 (cols, cols + rowLen, vals);
4663 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4664 rowLengths_(lclRow) = newRowLength;
4665 numDups += rowLen - newRowLength;
4678 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4689 using Teuchos::rcp_const_cast;
4690 using Teuchos::rcpFromRef;
4691 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4692 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4698 if (alpha == ZERO) {
4701 }
else if (beta != ONE) {
4715 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4716 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4722 const bool Y_is_overwritten = (beta ==
ZERO);
4725 const bool Y_is_replicated =
4726 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4734 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4741 RCP<const MV> X_colMap;
4742 if (importer.is_null ()) {
4750 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4752 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4757 X_colMap = rcpFromRef (X_in);
4761 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4767 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4770 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4771 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4778 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4785 if (! exporter.is_null ()) {
4786 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4788 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4794 if (Y_is_overwritten) {
4820 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4827 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4831 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4839 if (Y_is_replicated) {
4840 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4845 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4850 const Teuchos::ETransp mode,
4855 using Teuchos::null;
4858 using Teuchos::rcp_const_cast;
4859 using Teuchos::rcpFromRef;
4860 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4863 if (alpha == ZERO) {
4876 else if (beta == ZERO) {
4894 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4895 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4900 const bool Y_is_replicated = (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4901 const bool Y_is_overwritten = (beta ==
ZERO);
4902 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4908 X = rcp (
new MV (X_in, Teuchos::Copy));
4910 X = rcpFromRef (X_in);
4914 if (importer != Teuchos::null) {
4915 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4918 if (importMV_ == null) {
4919 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4922 if (exporter != Teuchos::null) {
4923 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4926 if (exportMV_ == null) {
4927 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4933 if (! exporter.is_null ()) {
4934 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4935 exportMV_->doImport (X_in, *exporter,
INSERT);
4942 if (importer != Teuchos::null) {
4943 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4950 importMV_->putScalar (ZERO);
4952 this->localApply (*X, *importMV_, mode, alpha, ZERO);
4954 if (Y_is_overwritten) {
4971 MV Y (Y_in, Teuchos::Copy);
4972 this->localApply (*X, Y, mode, alpha, beta);
4975 this->localApply (*X, Y_in, mode, alpha, beta);
4982 if (Y_is_replicated) {
4983 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4988 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4993 const Teuchos::ETransp mode,
4994 const Scalar& alpha,
4995 const Scalar& beta)
const
4998 using Teuchos::NO_TRANS;
4999 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5006 const char tfecfFuncName[] =
"localApply: ";
5007 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5011 const bool transpose = (mode != Teuchos::NO_TRANS);
5012 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5014 getColMap ()->getLocalNumElements (), std::runtime_error,
5015 "NO_TRANS case: X has the wrong number of local rows. "
5017 "getColMap()->getLocalNumElements() = " <<
5018 getColMap ()->getLocalNumElements () <<
".");
5019 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5021 getRowMap ()->getLocalNumElements (), std::runtime_error,
5022 "NO_TRANS case: Y has the wrong number of local rows. "
5024 "getRowMap()->getLocalNumElements() = " <<
5025 getRowMap ()->getLocalNumElements () <<
".");
5026 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5028 getRowMap ()->getLocalNumElements (), std::runtime_error,
5029 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5031 <<
" != getRowMap()->getLocalNumElements() = "
5032 << getRowMap ()->getLocalNumElements () <<
".");
5033 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5035 getColMap ()->getLocalNumElements (), std::runtime_error,
5036 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5038 <<
" != getColMap()->getLocalNumElements() = "
5039 << getColMap ()->getLocalNumElements () <<
".");
5040 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5041 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5042 "fill complete. You must call fillComplete() (possibly with "
5043 "domain and range Map arguments) without an intervening "
5044 "resumeFill() call before you may call this method.");
5045 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5047 std::runtime_error,
"X and Y must be constant stride.");
5052 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5053 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5054 && X_lcl.extent(0) != 0,
5055 std::runtime_error,
"X and Y may not alias one another.");
5058 #if KOKKOSKERNELS_VERSION >= 40299
5059 auto A_lcl = getLocalMatrixDevice();
5061 if(!applyHelper.get()) {
5064 bool useMergePath =
false;
5065 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
5071 if constexpr(std::is_same_v<execution_space, Kokkos::Cuda>) {
5072 LocalOrdinal nrows = getLocalNumRows();
5073 LocalOrdinal maxRowImbalance = 0;
5075 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5078 useMergePath =
true;
5081 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
5082 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
5086 const char* modeKK =
nullptr;
5089 case Teuchos::NO_TRANS:
5090 modeKK = KokkosSparse::NoTranspose;
break;
5091 case Teuchos::TRANS:
5092 modeKK = KokkosSparse::Transpose;
break;
5093 case Teuchos::CONJ_TRANS:
5094 modeKK = KokkosSparse::ConjugateTranspose;
break;
5096 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5099 if(applyHelper->shouldUseIntRowptrs())
5101 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5103 &applyHelper->handle_int, modeKK,
5109 &applyHelper->handle, modeKK,
5113 LocalOrdinal nrows = getLocalNumRows();
5114 LocalOrdinal maxRowImbalance = 0;
5116 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5118 auto matrix_lcl = getLocalMultiplyOperator();
5120 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5122 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5131 Teuchos::ETransp mode,
5136 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5138 TEUCHOS_TEST_FOR_EXCEPTION
5139 (! isFillComplete (), std::runtime_error,
5140 fnName <<
": Cannot call apply() until fillComplete() "
5141 "has been called.");
5143 if (mode == Teuchos::NO_TRANS) {
5144 ProfilingRegion regionNonTranspose (fnName);
5145 this->applyNonTranspose (X, Y, alpha, beta);
5148 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5149 this->applyTranspose (X, Y, mode, alpha, beta);
5154 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5156 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5162 const char tfecfFuncName[] =
"convert: ";
5164 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5165 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5166 "of the conversion) is not fill complete. You must first call "
5167 "fillComplete() (possibly with the domain and range Map) without an "
5168 "intervening call to resumeFill(), before you may call this method.");
5170 RCP<output_matrix_type> newMatrix
5171 (
new output_matrix_type (this->getCrsGraph ()));
5175 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5176 this->getLocalMatrixDevice ().values);
5180 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5186 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5193 const char tfecfFuncName[] =
"checkInternalState: ";
5194 const char err[] =
"Internal state is not consistent. "
5195 "Please report this bug to the Tpetra developers.";
5199 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5200 (staticGraph_.is_null (), std::logic_error, err);
5204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5205 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5206 std::logic_error, err);
5208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5209 (isFillComplete () && ! staticGraph_->isFillComplete (),
5210 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5211 "but its graph is NOT fill complete.");
5214 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5215 (staticGraph_->indicesAreAllocated () &&
5216 staticGraph_->getLocalAllocationSize() > 0 &&
5217 staticGraph_->getLocalNumRows() > 0 &&
5218 valuesUnpacked_wdv.extent (0) == 0,
5219 std::logic_error, err);
5223 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5228 std::ostringstream os;
5230 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5231 if (this->getObjectLabel () !=
"") {
5232 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5234 if (isFillComplete ()) {
5235 os <<
"isFillComplete: true"
5236 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5237 << getGlobalNumCols () <<
"]"
5238 <<
", global number of entries: " << getGlobalNumEntries ()
5242 os <<
"isFillComplete: false"
5243 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5244 << getGlobalNumCols () <<
"]}";
5249 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5253 const Teuchos::EVerbosityLevel verbLevel)
const
5257 using Teuchos::ArrayView;
5258 using Teuchos::Comm;
5260 using Teuchos::TypeNameTraits;
5261 using Teuchos::VERB_DEFAULT;
5262 using Teuchos::VERB_NONE;
5263 using Teuchos::VERB_LOW;
5264 using Teuchos::VERB_MEDIUM;
5265 using Teuchos::VERB_HIGH;
5266 using Teuchos::VERB_EXTREME;
5268 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5270 if (vl == VERB_NONE) {
5275 Teuchos::OSTab tab0 (out);
5277 RCP<const Comm<int> > comm = this->getComm();
5278 const int myRank = comm->getRank();
5279 const int numProcs = comm->getSize();
5281 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5284 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5294 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5296 Teuchos::OSTab tab1 (out);
5299 if (this->getObjectLabel () !=
"") {
5300 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5303 out <<
"Template parameters:" << endl;
5304 Teuchos::OSTab tab2 (out);
5305 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5306 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5307 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5308 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5310 if (isFillComplete()) {
5311 out <<
"isFillComplete: true" << endl
5312 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5313 << getGlobalNumCols () <<
"]" << endl
5314 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5315 << endl <<
"Global max number of entries in a row: "
5316 << getGlobalMaxNumRowEntries () << endl;
5319 out <<
"isFillComplete: false" << endl
5320 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5321 << getGlobalNumCols () <<
"]" << endl;
5325 if (vl < VERB_MEDIUM) {
5331 out << endl <<
"Row Map:" << endl;
5333 if (getRowMap ().is_null ()) {
5335 out <<
"null" << endl;
5342 getRowMap ()->describe (out, vl);
5347 out <<
"Column Map: ";
5349 if (getColMap ().is_null ()) {
5351 out <<
"null" << endl;
5353 }
else if (getColMap () == getRowMap ()) {
5355 out <<
"same as row Map" << endl;
5361 getColMap ()->describe (out, vl);
5366 out <<
"Domain Map: ";
5368 if (getDomainMap ().is_null ()) {
5370 out <<
"null" << endl;
5372 }
else if (getDomainMap () == getRowMap ()) {
5374 out <<
"same as row Map" << endl;
5376 }
else if (getDomainMap () == getColMap ()) {
5378 out <<
"same as column Map" << endl;
5384 getDomainMap ()->describe (out, vl);
5389 out <<
"Range Map: ";
5391 if (getRangeMap ().is_null ()) {
5393 out <<
"null" << endl;
5395 }
else if (getRangeMap () == getDomainMap ()) {
5397 out <<
"same as domain Map" << endl;
5399 }
else if (getRangeMap () == getRowMap ()) {
5401 out <<
"same as row Map" << endl;
5407 getRangeMap ()->describe (out, vl);
5411 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5412 if (myRank == curRank) {
5413 out <<
"Process rank: " << curRank << endl;
5414 Teuchos::OSTab tab2 (out);
5415 if (! staticGraph_->indicesAreAllocated ()) {
5416 out <<
"Graph indices not allocated" << endl;
5419 out <<
"Number of allocated entries: "
5420 << staticGraph_->getLocalAllocationSize () << endl;
5422 out <<
"Number of entries: " << getLocalNumEntries () << endl
5423 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5432 if (vl < VERB_HIGH) {
5437 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5438 if (myRank == curRank) {
5439 out << std::setw(width) <<
"Proc Rank"
5440 << std::setw(width) <<
"Global Row"
5441 << std::setw(width) <<
"Num Entries";
5442 if (vl == VERB_EXTREME) {
5443 out << std::setw(width) <<
"(Index,Value)";
5446 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5447 const size_t nE = getNumEntriesInLocalRow(r);
5448 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5449 out << std::setw(width) << myRank
5450 << std::setw(width) << gid
5451 << std::setw(width) << nE;
5452 if (vl == VERB_EXTREME) {
5453 if (isGloballyIndexed()) {
5454 global_inds_host_view_type rowinds;
5455 values_host_view_type rowvals;
5456 getGlobalRowView (gid, rowinds, rowvals);
5457 for (
size_t j = 0; j < nE; ++j) {
5458 out <<
" (" << rowinds[j]
5459 <<
", " << rowvals[j]
5463 else if (isLocallyIndexed()) {
5464 local_inds_host_view_type rowinds;
5465 values_host_view_type rowvals;
5466 getLocalRowView (r, rowinds, rowvals);
5467 for (
size_t j=0; j < nE; ++j) {
5468 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5469 <<
", " << rowvals[j]
5485 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5499 return (srcRowMat !=
nullptr);
5502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5506 const typename crs_graph_type::padding_type& padding,
5512 using LO = local_ordinal_type;
5513 using row_ptrs_type =
5514 typename local_graph_device_type::row_map_type::non_const_type;
5515 using range_policy =
5516 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5517 const char tfecfFuncName[] =
"applyCrsPadding";
5518 const char suffix[] =
5519 ". Please report this bug to the Tpetra developers.";
5520 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5522 std::unique_ptr<std::string> prefix;
5524 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5525 std::ostringstream os;
5526 os << *prefix <<
"padding: ";
5529 std::cerr << os.str();
5531 const int myRank = ! verbose ? -1 : [&] () {
5532 auto map = this->getMap();
5533 if (map.is_null()) {
5536 auto comm = map->getComm();
5537 if (comm.is_null()) {
5540 return comm->getRank();
5544 if (! myGraph_->indicesAreAllocated()) {
5546 std::ostringstream os;
5547 os << *prefix <<
"Call allocateIndices" << endl;
5548 std::cerr << os.str();
5550 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5562 std::ostringstream os;
5563 os << *prefix <<
"Allocate row_ptrs_beg: "
5564 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5565 std::cerr << os.str();
5567 using Kokkos::view_alloc;
5568 using Kokkos::WithoutInitializing;
5569 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5570 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5572 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5574 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5575 size_t(row_ptr_beg.extent(0) - 1);
5577 std::ostringstream os;
5578 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5579 std::cerr << os.str();
5581 row_ptrs_type row_ptr_end(
5582 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5584 row_ptrs_type num_row_entries_d;
5586 const bool refill_num_row_entries =
5587 myGraph_->k_numRowEntries_.extent(0) != 0;
5589 if (refill_num_row_entries) {
5592 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5593 myGraph_->k_numRowEntries_);
5594 Kokkos::parallel_for
5595 (
"Fill end row pointers", range_policy(0, N),
5596 KOKKOS_LAMBDA (
const size_t i) {
5597 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5604 Kokkos::parallel_for
5605 (
"Fill end row pointers", range_policy(0, N),
5606 KOKKOS_LAMBDA (
const size_t i) {
5607 row_ptr_end(i) = row_ptr_beg(i+1);
5611 if (myGraph_->isGloballyIndexed()) {
5613 myGraph_->gblInds_wdv,
5614 valuesUnpacked_wdv, padding, myRank, verbose);
5615 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5616 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5617 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5618 (newValuesLen != newColIndsLen, std::logic_error,
5619 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5620 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5625 myGraph_->lclIndsUnpacked_wdv,
5626 valuesUnpacked_wdv, padding, myRank, verbose);
5627 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5628 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5630 (newValuesLen != newColIndsLen, std::logic_error,
5631 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5632 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5636 if (refill_num_row_entries) {
5637 Kokkos::parallel_for
5638 (
"Fill num entries", range_policy(0, N),
5639 KOKKOS_LAMBDA (
const size_t i) {
5640 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5646 std::ostringstream os;
5647 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5648 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5649 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5650 std::cerr << os.str();
5651 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5652 row_ptr_beg.extent(0) );
5654 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5657 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5659 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5660 copyAndPermuteStaticGraph(
5661 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5662 const size_t numSameIDs,
5663 const LocalOrdinal permuteToLIDs[],
5664 const LocalOrdinal permuteFromLIDs[],
5665 const size_t numPermutes)
5667 using Details::ProfilingRegion;
5668 using Teuchos::Array;
5669 using Teuchos::ArrayView;
5671 using LO = LocalOrdinal;
5672 using GO = GlobalOrdinal;
5673 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5674 const char suffix[] =
5675 " Please report this bug to the Tpetra developers.";
5676 ProfilingRegion regionCAP
5677 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5681 std::unique_ptr<std::string> prefix;
5683 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5684 std::ostringstream os;
5685 os << *prefix <<
"Start" << endl;
5687 const char*
const prefix_raw =
5688 verbose ? prefix.get()->c_str() :
nullptr;
5690 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5695 const map_type& srcRowMap = * (srcMat.getRowMap ());
5696 nonconst_global_inds_host_view_type rowInds;
5697 nonconst_values_host_view_type rowVals;
5698 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5699 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5703 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5704 const GO targetGID = sourceGID;
5706 ArrayView<const GO>rowIndsConstView;
5707 ArrayView<const Scalar> rowValsConstView;
5709 if (sourceIsLocallyIndexed) {
5710 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5711 if (rowLength > static_cast<size_t> (rowInds.size())) {
5712 Kokkos::resize(rowInds,rowLength);
5713 Kokkos::resize(rowVals,rowLength);
5717 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5718 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5723 size_t checkRowLength = 0;
5724 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5725 rowValsView, checkRowLength);
5727 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5728 (rowLength != checkRowLength, std::logic_error,
"For "
5729 "global row index " << sourceGID <<
", the source "
5730 "matrix's getNumEntriesInGlobalRow returns a row length "
5731 "of " << rowLength <<
", but getGlobalRowCopy reports "
5732 "a row length of " << checkRowLength <<
"." << suffix);
5739 rowIndsConstView = Teuchos::ArrayView<const GO> (
5740 rowIndsView.data(), rowIndsView.extent(0),
5741 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5742 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5743 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5744 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5749 global_inds_host_view_type rowIndsView;
5750 values_host_view_type rowValsView;
5751 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5756 rowIndsConstView = Teuchos::ArrayView<const GO> (
5757 rowIndsView.data(), rowIndsView.extent(0),
5758 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5759 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5760 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5761 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5769 combineGlobalValues(targetGID, rowIndsConstView,
5771 prefix_raw, debug, verbose);
5775 std::ostringstream os;
5776 os << *prefix <<
"Do permutes" << endl;
5779 const map_type& tgtRowMap = * (this->getRowMap ());
5780 for (
size_t p = 0; p < numPermutes; ++p) {
5781 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5782 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5784 ArrayView<const GO> rowIndsConstView;
5785 ArrayView<const Scalar> rowValsConstView;
5787 if (sourceIsLocallyIndexed) {
5788 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5789 if (rowLength > static_cast<size_t> (rowInds.size ())) {
5790 Kokkos::resize(rowInds,rowLength);
5791 Kokkos::resize(rowVals,rowLength);
5795 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5796 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5801 size_t checkRowLength = 0;
5802 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5803 rowValsView, checkRowLength);
5805 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5806 (rowLength != checkRowLength, std::logic_error,
"For "
5807 "source matrix global row index " << sourceGID <<
", "
5808 "getNumEntriesInGlobalRow returns a row length of " <<
5809 rowLength <<
", but getGlobalRowCopy a row length of "
5810 << checkRowLength <<
"." << suffix);
5817 rowIndsConstView = Teuchos::ArrayView<const GO> (
5818 rowIndsView.data(), rowIndsView.extent(0),
5819 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5820 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5821 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5822 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5827 global_inds_host_view_type rowIndsView;
5828 values_host_view_type rowValsView;
5829 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5834 rowIndsConstView = Teuchos::ArrayView<const GO> (
5835 rowIndsView.data(), rowIndsView.extent(0),
5836 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5837 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5838 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5839 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5844 combineGlobalValues(targetGID, rowIndsConstView,
5846 prefix_raw, debug, verbose);
5850 std::ostringstream os;
5851 os << *prefix <<
"Done" << endl;
5855 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5857 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5858 copyAndPermuteNonStaticGraph(
5859 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5860 const size_t numSameIDs,
5861 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5862 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5863 const size_t numPermutes)
5865 using Details::ProfilingRegion;
5866 using Teuchos::Array;
5867 using Teuchos::ArrayView;
5869 using LO = LocalOrdinal;
5870 using GO = GlobalOrdinal;
5871 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5872 const char suffix[] =
5873 " Please report this bug to the Tpetra developers.";
5874 ProfilingRegion regionCAP
5875 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5879 std::unique_ptr<std::string> prefix;
5881 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5882 std::ostringstream os;
5883 os << *prefix <<
"Start" << endl;
5885 const char*
const prefix_raw =
5886 verbose ? prefix.get()->c_str() :
nullptr;
5889 using row_graph_type = RowGraph<LO, GO, Node>;
5890 const row_graph_type& srcGraph = *(srcMat.getGraph());
5892 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5893 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5894 applyCrsPadding(*padding, verbose);
5896 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5901 const map_type& srcRowMap = * (srcMat.getRowMap ());
5902 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5903 using gids_type = nonconst_global_inds_host_view_type;
5904 using vals_type = nonconst_values_host_view_type;
5907 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5911 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5912 const GO targetGID = sourceGID;
5914 ArrayView<const GO> rowIndsConstView;
5915 ArrayView<const Scalar> rowValsConstView;
5917 if (sourceIsLocallyIndexed) {
5919 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5920 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5921 Kokkos::resize(rowInds,rowLength);
5922 Kokkos::resize(rowVals,rowLength);
5926 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5927 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5932 size_t checkRowLength = 0;
5933 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5936 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5937 (rowLength != checkRowLength, std::logic_error,
": For "
5938 "global row index " << sourceGID <<
", the source "
5939 "matrix's getNumEntriesInGlobalRow returns a row length "
5940 "of " << rowLength <<
", but getGlobalRowCopy reports "
5941 "a row length of " << checkRowLength <<
"." << suffix);
5943 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5944 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5947 global_inds_host_view_type rowIndsView;
5948 values_host_view_type rowValsView;
5949 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5955 rowIndsConstView = Teuchos::ArrayView<const GO> (
5956 rowIndsView.data(), rowIndsView.extent(0),
5957 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5958 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5959 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5960 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5966 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5967 rowValsConstView, prefix_raw, debug, verbose);
5971 std::ostringstream os;
5972 os << *prefix <<
"Do permutes" << endl;
5974 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5975 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5977 const map_type& tgtRowMap = * (this->getRowMap ());
5978 for (
size_t p = 0; p < numPermutes; ++p) {
5979 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5980 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5982 ArrayView<const GO> rowIndsConstView;
5983 ArrayView<const Scalar> rowValsConstView;
5985 if (sourceIsLocallyIndexed) {
5986 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5987 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5988 Kokkos::resize(rowInds,rowLength);
5989 Kokkos::resize(rowVals,rowLength);
5993 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5994 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5999 size_t checkRowLength = 0;
6000 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6001 rowValsView, checkRowLength);
6003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6004 (rowLength != checkRowLength, std::logic_error,
"For "
6005 "source matrix global row index " << sourceGID <<
", "
6006 "getNumEntriesInGlobalRow returns a row length of " <<
6007 rowLength <<
", but getGlobalRowCopy a row length of "
6008 << checkRowLength <<
"." << suffix);
6010 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6011 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6014 global_inds_host_view_type rowIndsView;
6015 values_host_view_type rowValsView;
6016 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6022 rowIndsConstView = Teuchos::ArrayView<const GO> (
6023 rowIndsView.data(), rowIndsView.extent(0),
6024 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6025 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6026 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6027 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6033 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6034 rowValsConstView, prefix_raw, debug, verbose);
6038 std::ostringstream os;
6039 os << *prefix <<
"Done" << endl;
6043 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6048 const size_t numSameIDs,
6049 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6050 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6059 const char tfecfFuncName[] =
"copyAndPermute: ";
6060 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6062 const bool verbose = Behavior::verbose(
"CrsMatrix");
6063 std::unique_ptr<std::string> prefix;
6065 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6066 std::ostringstream os;
6067 os << *prefix << endl
6068 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6069 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6078 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6080 std::cerr << os.str ();
6083 const auto numPermute = permuteToLIDs.extent (0);
6084 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6085 (numPermute != permuteFromLIDs.extent (0),
6086 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6087 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6088 << permuteFromLIDs.extent (0) <<
".");
6093 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6094 if (isStaticGraph ()) {
6095 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6096 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6097 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6098 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6100 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6101 permuteToLIDs_h.data(),
6102 permuteFromLIDs_h.data(),
6106 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6107 permuteFromLIDs, numPermute);
6111 std::ostringstream os;
6112 os << *prefix <<
"Done" << endl;
6113 std::cerr << os.str();
6117 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6122 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6123 Kokkos::DualView<char*, buffer_device_type>& exports,
6124 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6125 size_t& constantNumPackets)
6130 using Teuchos::outArg;
6131 using Teuchos::REDUCE_MAX;
6132 using Teuchos::reduceAll;
6134 typedef LocalOrdinal LO;
6135 typedef GlobalOrdinal GO;
6136 const char tfecfFuncName[] =
"packAndPrepare: ";
6137 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6139 const bool debug = Behavior::debug(
"CrsMatrix");
6140 const bool verbose = Behavior::verbose(
"CrsMatrix");
6143 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6144 if (pComm.is_null ()) {
6147 const Teuchos::Comm<int>& comm = *pComm;
6148 const int myRank = comm.getSize ();
6150 std::unique_ptr<std::string> prefix;
6152 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6153 std::ostringstream os;
6154 os << *prefix <<
"Start" << endl
6164 std::cerr << os.str ();
6187 std::ostringstream msg;
6190 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6191 const crs_matrix_type* srcCrsMat =
6192 dynamic_cast<const crs_matrix_type*
> (&source);
6193 if (srcCrsMat !=
nullptr) {
6195 std::ostringstream os;
6196 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6197 "calling packNew" << endl;
6198 std::cerr << os.str ();
6201 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6202 constantNumPackets);
6204 catch (std::exception& e) {
6206 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6210 using Kokkos::HostSpace;
6211 using Kokkos::subview;
6212 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6213 using range_type = Kokkos::pair<size_t, size_t>;
6216 std::ostringstream os;
6217 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6219 std::cerr << os.str ();
6222 const row_matrix_type* srcRowMat =
6223 dynamic_cast<const row_matrix_type*
> (&source);
6224 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6225 (srcRowMat ==
nullptr, std::invalid_argument,
6226 "The source object of the Import or Export operation is neither a "
6227 "CrsMatrix (with the same template parameters as the target object), "
6228 "nor a RowMatrix (with the same first four template parameters as the "
6239 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6240 auto exportLIDs_h = exportLIDs.view_host ();
6241 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6242 exportLIDs_h.size ());
6246 Teuchos::Array<char> exports_a;
6252 numPacketsPerLID.clear_sync_state ();
6253 numPacketsPerLID.modify_host ();
6254 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6255 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6256 numPacketsPerLID_h.size ());
6261 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6262 constantNumPackets);
6264 catch (std::exception& e) {
6266 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6270 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6271 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6272 const std::string oldLabel = exports.d_view.label ();
6273 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6274 exports = exports_type (newLabel, newAllocSize);
6279 exports.modify_host();
6281 auto exports_h = exports.view_host ();
6282 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6286 typedef typename exports_type::t_host::execution_space HES;
6287 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6288 Kokkos::View<const char*, host_device_type>
6289 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6296 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6299 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6300 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6301 "one or more participating processes.");
6305 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6306 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6307 "or more participating processes. Here is this process' error "
6308 "message: " << msg.str ());
6312 std::ostringstream os;
6313 os << *prefix <<
"packAndPrepare: Done!" << endl
6323 std::cerr << os.str ();
6327 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6329 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6330 packRow (
char exports[],
6331 const size_t offset,
6332 const size_t numEnt,
6333 const GlobalOrdinal gidsIn[],
6334 const impl_scalar_type valsIn[],
6335 const size_t numBytesPerValue)
const
6338 using Kokkos::subview;
6340 typedef LocalOrdinal LO;
6341 typedef GlobalOrdinal GO;
6342 typedef impl_scalar_type ST;
6350 const LO numEntLO =
static_cast<size_t> (numEnt);
6352 const size_t numEntBeg = offset;
6354 const size_t gidsBeg = numEntBeg + numEntLen;
6355 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6356 const size_t valsBeg = gidsBeg + gidsLen;
6357 const size_t valsLen = numEnt * numBytesPerValue;
6359 char*
const numEntOut = exports + numEntBeg;
6360 char*
const gidsOut = exports + gidsBeg;
6361 char*
const valsOut = exports + valsBeg;
6363 size_t numBytesOut = 0;
6368 Kokkos::pair<int, size_t> p;
6369 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6370 errorCode += p.first;
6371 numBytesOut += p.second;
6373 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6374 errorCode += p.first;
6375 numBytesOut += p.second;
6378 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6379 TEUCHOS_TEST_FOR_EXCEPTION
6380 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6381 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6382 << expectedNumBytes <<
".");
6383 TEUCHOS_TEST_FOR_EXCEPTION
6384 (errorCode != 0, std::runtime_error,
"packRow: "
6385 "PackTraits::packArray returned a nonzero error code");
6390 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6392 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6393 unpackRow (GlobalOrdinal gidsOut[],
6394 impl_scalar_type valsOut[],
6395 const char imports[],
6396 const size_t offset,
6397 const size_t numBytes,
6398 const size_t numEnt,
6399 const size_t numBytesPerValue)
6402 using Kokkos::subview;
6404 typedef LocalOrdinal LO;
6405 typedef GlobalOrdinal GO;
6406 typedef impl_scalar_type ST;
6408 Details::ProfilingRegion region_upack_row(
6409 "Tpetra::CrsMatrix::unpackRow",
6413 if (numBytes == 0) {
6416 const int myRank = this->getMap ()->getComm ()->getRank ();
6417 TEUCHOS_TEST_FOR_EXCEPTION
6418 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6419 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6420 "number of entries to unpack (as reported by numPacketsPerLID) "
6421 "for this row numEnt=" << numEnt <<
" != 0.");
6426 if (numEnt == 0 && numBytes != 0) {
6427 const int myRank = this->getMap ()->getComm ()->getRank ();
6428 TEUCHOS_TEST_FOR_EXCEPTION
6429 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6430 "unpackRow: The number of entries to unpack (as reported by "
6431 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6432 "numBytes=" << numBytes <<
" != 0.");
6438 const size_t numEntBeg = offset;
6440 const size_t gidsBeg = numEntBeg + numEntLen;
6441 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6442 const size_t valsBeg = gidsBeg + gidsLen;
6443 const size_t valsLen = numEnt * numBytesPerValue;
6445 const char*
const numEntIn = imports + numEntBeg;
6446 const char*
const gidsIn = imports + gidsBeg;
6447 const char*
const valsIn = imports + valsBeg;
6449 size_t numBytesOut = 0;
6453 if (static_cast<size_t> (numEntOut) != numEnt ||
6454 numEntOut == static_cast<LO> (0)) {
6455 const int myRank = this->getMap ()->getComm ()->getRank ();
6456 std::ostringstream os;
6457 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6458 bool firstErrorCondition =
false;
6459 if (static_cast<size_t> (numEntOut) != numEnt) {
6460 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6461 <<
" does not equal number of entries unpacked from imports "
6462 "buffer numEntOut=" << numEntOut <<
".";
6463 firstErrorCondition =
true;
6465 if (numEntOut == static_cast<LO> (0)) {
6466 if (firstErrorCondition) {
6469 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6470 "but number of bytes to unpack for this row numBytes=" << numBytes
6471 <<
" != 0. This should never happen, since packRow should only "
6472 "ever pack rows with a nonzero number of entries. In this case, "
6473 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6476 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6480 Kokkos::pair<int, size_t> p;
6481 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6482 errorCode += p.first;
6483 numBytesOut += p.second;
6485 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6486 errorCode += p.first;
6487 numBytesOut += p.second;
6490 TEUCHOS_TEST_FOR_EXCEPTION
6491 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6492 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6494 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6495 TEUCHOS_TEST_FOR_EXCEPTION
6496 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6497 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6498 << expectedNumBytes <<
".");
6500 TEUCHOS_TEST_FOR_EXCEPTION
6501 (errorCode != 0, std::runtime_error,
"unpackRow: "
6502 "PackTraits::unpackArray returned a nonzero error code");
6507 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6509 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6510 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6511 size_t& totalNumEntries,
6512 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6514 using Details::Behavior;
6517 typedef impl_scalar_type IST;
6518 typedef LocalOrdinal LO;
6519 typedef GlobalOrdinal GO;
6525 const bool verbose = Behavior::verbose(
"CrsMatrix");
6526 std::unique_ptr<std::string> prefix;
6528 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6529 std::ostringstream os;
6530 os << *prefix <<
"Before:"
6538 std::cerr << os.str ();
6543 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6545 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6546 auto exportLIDs_h = exportLIDs.view_host ();
6549 totalNumEntries = 0;
6550 for (LO i = 0; i < numExportLIDs; ++i) {
6551 const LO lclRow = exportLIDs_h[i];
6552 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6555 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6558 totalNumEntries += curNumEntries;
6569 const size_t allocSize =
6570 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6571 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6572 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
6573 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6575 const std::string oldLabel = exports.d_view.label ();
6576 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6577 exports = exports_type (newLabel, allocSize);
6581 std::ostringstream os;
6582 os << *prefix <<
"After:"
6590 std::cerr << os.str ();
6594 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6597 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6598 Kokkos::DualView<char*, buffer_device_type>& exports,
6599 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6600 size_t& constantNumPackets)
const
6604 if (this->isStaticGraph ()) {
6607 constantNumPackets);
6610 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6611 constantNumPackets);
6615 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6618 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6619 Kokkos::DualView<char*, buffer_device_type>& exports,
6620 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6621 size_t& constantNumPackets)
const
6629 using LO = LocalOrdinal;
6630 using GO = GlobalOrdinal;
6631 using ST = impl_scalar_type;
6632 const char tfecfFuncName[] =
"packNonStaticNew: ";
6634 const bool verbose = Behavior::verbose(
"CrsMatrix");
6635 std::unique_ptr<std::string> prefix;
6637 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6638 std::ostringstream os;
6639 os << *prefix <<
"Start" << endl;
6640 std::cerr << os.str ();
6643 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6644 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6645 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
6646 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6647 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6653 constantNumPackets = 0;
6658 size_t totalNumEntries = 0;
6659 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6660 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6663 exports.clear_sync_state();
6664 exports.modify_host();
6665 auto exports_h = exports.view_host ();
6667 std::ostringstream os;
6668 os << *prefix <<
"After marking exports as modified on host, "
6670 std::cerr << os.str ();
6674 auto exportLIDs_h = exportLIDs.view_host ();
6677 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6678 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6679 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6684 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6688 typename global_inds_host_view_type::non_const_type gidsIn_k;
6689 if (this->isLocallyIndexed()) {
6691 typename global_inds_host_view_type::non_const_type(
"packGids",
6696 for (
size_t i = 0; i < numExportLIDs; ++i) {
6697 const LO lclRow = exportLIDs_h[i];
6699 size_t numBytes = 0;
6700 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6707 numPacketsPerLID_h[i] = 0;
6711 if (this->isLocallyIndexed ()) {
6712 typename global_inds_host_view_type::non_const_type gidsIn;
6713 values_host_view_type valsIn;
6717 local_inds_host_view_type lidsIn;
6718 this->getLocalRowView (lclRow, lidsIn, valsIn);
6719 const map_type& colMap = * (this->getColMap ());
6720 for (
size_t k = 0; k < numEnt; ++k) {
6721 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6723 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6725 const size_t numBytesPerValue =
6726 PackTraits<ST>::packValueCount (valsIn[0]);
6727 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6728 gidsIn.data (), valsIn.data (),
6731 else if (this->isGloballyIndexed ()) {
6732 global_inds_host_view_type gidsIn;
6733 values_host_view_type valsIn;
6739 const map_type& rowMap = * (this->getRowMap ());
6740 const GO gblRow = rowMap.getGlobalElement (lclRow);
6741 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6743 const size_t numBytesPerValue =
6744 PackTraits<ST>::packValueCount (valsIn[0]);
6745 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6746 gidsIn.data (), valsIn.data (),
6753 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6754 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6755 "First invalid offset into 'exports' pack buffer at index i = " << i
6756 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6757 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6762 numPacketsPerLID_h[i] = numBytes;
6767 std::ostringstream os;
6768 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6775 std::cerr << os.str ();
6779 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6781 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6782 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6783 const LocalOrdinal numEnt,
6784 const impl_scalar_type vals[],
6785 const GlobalOrdinal cols[],
6787 const char*
const prefix,
6791 using GO = GlobalOrdinal;
6795 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6796 Teuchos::ArrayView<const GO> cols_av
6797 (numEnt == 0 ?
nullptr : cols, numEnt);
6798 Teuchos::ArrayView<const Scalar> vals_av
6799 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
6804 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6805 prefix, debug, verbose);
6809 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6811 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6812 combineGlobalValues(
6813 const GlobalOrdinal globalRowIndex,
6814 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6815 const Teuchos::ArrayView<const Scalar>& values,
6817 const char*
const prefix,
6821 const char tfecfFuncName[] =
"combineGlobalValues: ";
6823 if (isStaticGraph ()) {
6827 if (combineMode ==
ADD) {
6828 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6830 else if (combineMode ==
REPLACE) {
6831 replaceGlobalValues (globalRowIndex, columnIndices, values);
6833 else if (combineMode ==
ABSMAX) {
6834 using ::Tpetra::Details::AbsMax;
6836 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6840 else if (combineMode ==
INSERT) {
6841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6842 (isStaticGraph() && combineMode ==
INSERT,
6843 std::invalid_argument,
"INSERT combine mode is forbidden "
6844 "if the matrix has a static (const) graph (i.e., was "
6845 "constructed with the CrsMatrix constructor that takes a "
6846 "const CrsGraph pointer).");
6849 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6850 (
true, std::logic_error,
"Invalid combine mode; should "
6852 "Please report this bug to the Tpetra developers.");
6856 if (combineMode ==
ADD || combineMode ==
INSERT) {
6863 insertGlobalValuesFilteredChecked(globalRowIndex,
6864 columnIndices, values, prefix, debug, verbose);
6875 else if (combineMode ==
ABSMAX) {
6876 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6877 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6878 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6881 else if (combineMode ==
REPLACE) {
6882 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6883 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6884 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6888 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6889 true, std::logic_error,
"Should never get here! Please report this "
6890 "bug to the Tpetra developers.");
6895 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6899 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6900 Kokkos::DualView<char*, buffer_device_type> imports,
6901 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6902 const size_t constantNumPackets,
6909 const char tfecfFuncName[] =
"unpackAndCombine: ";
6910 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6912 const bool debug = Behavior::debug(
"CrsMatrix");
6913 const bool verbose = Behavior::verbose(
"CrsMatrix");
6914 constexpr
int numValidModes = 5;
6917 const char* validModeNames[numValidModes] =
6918 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6920 std::unique_ptr<std::string> prefix;
6922 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6923 std::ostringstream os;
6924 os << *prefix <<
"Start:" << endl
6934 << *prefix <<
" constantNumPackets: " << constantNumPackets
6938 std::cerr << os.str ();
6942 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6943 validModes+numValidModes) {
6944 std::ostringstream os;
6945 os <<
"Invalid combine mode. Valid modes are {";
6946 for (
int k = 0; k < numValidModes; ++k) {
6947 os << validModeNames[k];
6948 if (k < numValidModes - 1) {
6953 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6954 (
true, std::invalid_argument, os.str ());
6956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6957 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6958 std::invalid_argument,
"importLIDs.extent(0)="
6959 << importLIDs.extent(0)
6960 <<
" != numPacketsPerLID.extent(0)="
6961 << numPacketsPerLID.extent(0) <<
".");
6964 if (combineMode ==
ZERO) {
6969 using Teuchos::reduceAll;
6970 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6973 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6974 constantNumPackets, combineMode,
6976 }
catch (std::exception& e) {
6981 const Teuchos::Comm<int>& comm = * (this->getComm ());
6982 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6983 lclBad, Teuchos::outArg (gblBad));
6989 std::ostringstream os;
6990 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6991 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6992 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
6993 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6994 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6995 "threw an exception on one or more participating processes: "
6996 << endl << msg->str ());
7000 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7001 constantNumPackets, combineMode,
7006 std::ostringstream os;
7007 os << *prefix <<
"Done!" << endl
7017 std::cerr << os.str ();
7021 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7025 const Kokkos::DualView<
const local_ordinal_type*,
7026 buffer_device_type>& importLIDs,
7027 Kokkos::DualView<char*, buffer_device_type> imports,
7028 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7029 const size_t constantNumPackets,
7034 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7038 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7039 std::unique_ptr<std::string> prefix;
7041 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7042 std::ostringstream os;
7043 os << *prefix <<
"isStaticGraph(): "
7044 << (isStaticGraph() ?
"true" :
"false")
7045 <<
", importLIDs.extent(0): "
7046 << importLIDs.extent(0)
7047 <<
", imports.extent(0): "
7048 << imports.extent(0)
7049 <<
", numPacketsPerLID.extent(0): "
7050 << numPacketsPerLID.extent(0)
7052 std::cerr << os.str();
7055 if (isStaticGraph ()) {
7056 using Details::unpackCrsMatrixAndCombineNew;
7057 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7058 importLIDs, constantNumPackets,
7063 using padding_type =
typename crs_graph_type::padding_type;
7064 std::unique_ptr<padding_type> padding;
7066 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7067 importLIDs, imports, numPacketsPerLID, verbose);
7069 catch (std::exception& e) {
7070 const auto rowMap = getRowMap();
7071 const auto comm = rowMap.is_null() ? Teuchos::null :
7073 const int myRank = comm.is_null() ? -1 : comm->getRank();
7074 TEUCHOS_TEST_FOR_EXCEPTION
7075 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7076 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7077 "threw an exception: " << e.what());
7080 std::ostringstream os;
7081 os << *prefix <<
"Call applyCrsPadding" << endl;
7082 std::cerr << os.str();
7084 applyCrsPadding(*padding, verbose);
7087 std::ostringstream os;
7088 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7089 std::cerr << os.str();
7091 unpackAndCombineImplNonStatic(importLIDs, imports,
7098 std::ostringstream os;
7099 os << *prefix <<
"Done" << endl;
7100 std::cerr << os.str();
7104 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7106 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7107 unpackAndCombineImplNonStatic(
7108 const Kokkos::DualView<
const local_ordinal_type*,
7109 buffer_device_type>& importLIDs,
7110 Kokkos::DualView<char*, buffer_device_type> imports,
7111 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7112 const size_t constantNumPackets,
7116 using Kokkos::subview;
7117 using Kokkos::MemoryUnmanaged;
7118 using Details::Behavior;
7121 using Details::PackTraits;
7122 using Details::ScalarViewTraits;
7124 using LO = LocalOrdinal;
7125 using GO = GlobalOrdinal;
7126 using ST = impl_scalar_type;
7127 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7129 typename View<int*, device_type>::HostMirror::execution_space;
7130 using pair_type = std::pair<typename View<int*, HES>::size_type,
7131 typename View<int*, HES>::size_type>;
7132 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7133 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7134 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7136 const bool debug = Behavior::debug(
"CrsMatrix");
7137 const bool verbose = Behavior::verbose(
"CrsMatrix");
7138 std::unique_ptr<std::string> prefix;
7140 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7141 std::ostringstream os;
7142 os << *prefix << endl;
7143 std::cerr << os.str ();
7145 const char*
const prefix_raw =
7146 verbose ? prefix.get()->c_str() :
nullptr;
7148 const size_type numImportLIDs = importLIDs.extent (0);
7149 if (combineMode ==
ZERO || numImportLIDs == 0) {
7153 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7154 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7159 if (imports.need_sync_host()) {
7160 imports.sync_host ();
7162 auto imports_h = imports.view_host();
7165 if (numPacketsPerLID.need_sync_host()) {
7166 numPacketsPerLID.sync_host ();
7168 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7170 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7171 auto importLIDs_h = importLIDs.view_host();
7173 size_t numBytesPerValue;
7184 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7189 size_t maxRowNumEnt = 0;
7190 for (size_type i = 0; i < numImportLIDs; ++i) {
7191 const size_t numBytes = numPacketsPerLID_h[i];
7192 if (numBytes == 0) {
7197 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7198 (offset + numBytes >
size_t(imports_h.extent (0)),
7199 std::logic_error,
": At local row index importLIDs_h[i="
7200 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7201 <<
") + numBytes (=" << numBytes <<
") > "
7202 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7207 const size_t theNumBytes =
7209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7210 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7211 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7213 const char*
const inBuf = imports_h.data () + offset;
7214 const size_t actualNumBytes =
7218 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7219 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7220 <<
", actualNumBytes=" << actualNumBytes
7221 <<
" > numBytes=" << numBytes <<
".");
7222 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7223 (numEntLO == 0, std::logic_error,
": At local row index "
7224 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7225 "the number of entries read from the packed data is "
7226 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7230 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7238 View<GO*, HES> gblColInds;
7239 View<LO*, HES> lclColInds;
7240 View<ST*, HES> vals;
7253 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7254 gid, maxRowNumEnt,
"gids");
7255 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7256 lid, maxRowNumEnt,
"lids");
7257 vals = ScalarViewTraits<ST, HES>::allocateArray(
7258 val, maxRowNumEnt,
"vals");
7262 for (size_type i = 0; i < numImportLIDs; ++i) {
7263 const size_t numBytes = numPacketsPerLID_h[i];
7264 if (numBytes == 0) {
7268 const char*
const inBuf = imports_h.data () + offset;
7271 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7272 const LO lclRow = importLIDs_h[i];
7274 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7275 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7277 const size_t numBytesOut =
7278 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7279 offset, numBytes, numEnt, numBytesPerValue);
7280 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7281 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7282 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7283 << numBytesOut <<
".");
7285 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7286 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7287 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7288 combineMode, prefix_raw, debug, verbose);
7294 std::ostringstream os;
7295 os << *prefix <<
"Done" << endl;
7296 std::cerr << os.str();
7300 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7301 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7304 const bool force)
const
7306 using Teuchos::null;
7310 TEUCHOS_TEST_FOR_EXCEPTION(
7311 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7312 "MapMultiVector: You may only call this method if the matrix has a "
7313 "column Map. If the matrix does not yet have a column Map, you should "
7314 "first call fillComplete (with domain and range Map if necessary).");
7318 TEUCHOS_TEST_FOR_EXCEPTION(
7319 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7320 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7321 "this matrix's graph is fill complete.");
7324 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7325 RCP<const map_type> colMap = this->getColMap ();
7338 if (! importer.is_null () || force) {
7339 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7340 X_colMap = rcp (
new MV (colMap, numVecs));
7343 importMV_ = X_colMap;
7346 X_colMap = importMV_;
7357 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7358 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7361 const bool force)
const
7363 using Teuchos::null;
7369 TEUCHOS_TEST_FOR_EXCEPTION(
7370 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7371 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7372 "matrix's graph is fill complete.");
7375 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7379 RCP<const map_type> rowMap = this->getRowMap ();
7391 if (! exporter.is_null () || force) {
7392 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7393 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7394 exportMV_ = Y_rowMap;
7397 Y_rowMap = exportMV_;
7403 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7408 TEUCHOS_TEST_FOR_EXCEPTION(
7409 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7410 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7411 "was created with a constant graph (that is, when it was created using "
7412 "the version of its constructor that takes an RCP<const CrsGraph>). "
7413 "This is because the matrix is not allowed to modify the graph in that "
7414 "case, but removing empty processes requires modifying the graph.");
7415 myGraph_->removeEmptyProcessesInPlace (newMap);
7419 this->map_ = this->getRowMap ();
7423 staticGraph_ = Teuchos::rcp_const_cast<
const Graph> (myGraph_);
7426 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7427 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7432 const Teuchos::RCP<const map_type>& domainMap,
7433 const Teuchos::RCP<const map_type>& rangeMap,
7434 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7436 using Teuchos::Array;
7437 using Teuchos::ArrayView;
7438 using Teuchos::ParameterList;
7441 using Teuchos::rcp_implicit_cast;
7442 using Teuchos::sublist;
7446 using crs_matrix_type =
7448 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7452 std::unique_ptr<std::string> prefix;
7454 prefix = this->createPrefix(
"CrsMatrix",
"add");
7455 std::ostringstream os;
7456 os << *prefix <<
"Start" << endl;
7457 std::cerr << os.str ();
7460 const crs_matrix_type& B = *
this;
7461 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7462 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7469 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7470 RCP<const map_type> B_domainMap = B.getDomainMap ();
7471 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7473 RCP<const map_type> theDomainMap = domainMap;
7474 RCP<const map_type> theRangeMap = rangeMap;
7476 if (domainMap.is_null ()) {
7477 if (B_domainMap.is_null ()) {
7478 TEUCHOS_TEST_FOR_EXCEPTION(
7479 A_domainMap.is_null (), std::invalid_argument,
7480 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7481 "then you must supply a nonnull domain Map to this method.");
7482 theDomainMap = A_domainMap;
7484 theDomainMap = B_domainMap;
7487 if (rangeMap.is_null ()) {
7488 if (B_rangeMap.is_null ()) {
7489 TEUCHOS_TEST_FOR_EXCEPTION(
7490 A_rangeMap.is_null (), std::invalid_argument,
7491 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7492 "then you must supply a nonnull range Map to this method.");
7493 theRangeMap = A_rangeMap;
7495 theRangeMap = B_rangeMap;
7503 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7504 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7505 TEUCHOS_TEST_FOR_EXCEPTION
7506 (! B_domainMap->isSameAs(*A_domainMap),
7507 std::invalid_argument,
7508 errPfx <<
"The input RowMatrix A must have a domain Map "
7509 "which is the same as (isSameAs) this RowMatrix's "
7511 TEUCHOS_TEST_FOR_EXCEPTION
7512 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7513 errPfx <<
"The input RowMatrix A must have a range Map "
7514 "which is the same as (isSameAs) this RowMatrix's range "
7516 TEUCHOS_TEST_FOR_EXCEPTION
7517 (! domainMap.is_null() &&
7518 ! domainMap->isSameAs(*B_domainMap),
7519 std::invalid_argument,
7520 errPfx <<
"The input domain Map must be the same as "
7521 "(isSameAs) this RowMatrix's domain Map.");
7522 TEUCHOS_TEST_FOR_EXCEPTION
7523 (! rangeMap.is_null() &&
7524 ! rangeMap->isSameAs(*B_rangeMap),
7525 std::invalid_argument,
7526 errPfx <<
"The input range Map must be the same as "
7527 "(isSameAs) this RowMatrix's range Map.");
7530 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7531 TEUCHOS_TEST_FOR_EXCEPTION
7532 (! domainMap.is_null() &&
7533 ! domainMap->isSameAs(*B_domainMap),
7534 std::invalid_argument,
7535 errPfx <<
"The input domain Map must be the same as "
7536 "(isSameAs) this RowMatrix's domain Map.");
7537 TEUCHOS_TEST_FOR_EXCEPTION
7538 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7539 std::invalid_argument,
7540 errPfx <<
"The input range Map must be the same as "
7541 "(isSameAs) this RowMatrix's range Map.");
7544 TEUCHOS_TEST_FOR_EXCEPTION
7545 (domainMap.is_null() || rangeMap.is_null(),
7546 std::invalid_argument, errPfx <<
"If neither A nor B "
7547 "have a domain and range Map, then you must supply a "
7548 "nonnull domain and range Map to this method.");
7555 bool callFillComplete =
true;
7556 RCP<ParameterList> constructorSublist;
7557 RCP<ParameterList> fillCompleteSublist;
7558 if (! params.is_null()) {
7560 params->get(
"Call fillComplete", callFillComplete);
7561 constructorSublist = sublist(params,
"Constructor parameters");
7562 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7565 RCP<const map_type> A_rowMap = A.
getRowMap ();
7566 RCP<const map_type> B_rowMap = B.getRowMap ();
7567 RCP<const map_type> C_rowMap = B_rowMap;
7568 RCP<crs_matrix_type> C;
7574 if (A_rowMap->isSameAs (*B_rowMap)) {
7575 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7576 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7579 if (alpha != ZERO) {
7580 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7582 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7587 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7588 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7589 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7593 if (constructorSublist.is_null ()) {
7594 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7596 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7597 constructorSublist));
7608 TEUCHOS_TEST_FOR_EXCEPTION
7609 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7610 "be the same for statically allocated matrices, to ensure "
7611 "that there is sufficient space to do the addition.");
7614 TEUCHOS_TEST_FOR_EXCEPTION
7615 (C.is_null (), std::logic_error,
7616 errPfx <<
"C should not be null at this point. "
7617 "Please report this bug to the Tpetra developers.");
7620 std::ostringstream os;
7621 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7622 std::cerr << os.str ();
7624 using gids_type = nonconst_global_inds_host_view_type;
7625 using vals_type = nonconst_values_host_view_type;
7629 if (alpha != ZERO) {
7630 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7631 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7633 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7634 if (A_numEntries > static_cast<size_t> (ind.size ())) {
7635 Kokkos::resize(ind,A_numEntries);
7636 Kokkos::resize(val,A_numEntries);
7638 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7639 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7643 for (
size_t k = 0; k < A_numEntries; ++k) {
7644 valView[k] *= alpha;
7647 C->insertGlobalValues (globalRow, A_numEntries,
7648 reinterpret_cast<Scalar *>(valView.data()),
7654 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7655 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7656 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7657 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7658 if (B_numEntries > static_cast<size_t> (ind.size ())) {
7659 Kokkos::resize(ind,B_numEntries);
7660 Kokkos::resize(val,B_numEntries);
7662 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7663 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7664 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7667 for (
size_t k = 0; k < B_numEntries; ++k) {
7671 C->insertGlobalValues (globalRow, B_numEntries,
7672 reinterpret_cast<Scalar *>(valView.data()),
7677 if (callFillComplete) {
7679 std::ostringstream os;
7680 os << *prefix <<
"Call fillComplete on C" << endl;
7681 std::cerr << os.str ();
7683 if (fillCompleteSublist.is_null ()) {
7684 C->fillComplete (theDomainMap, theRangeMap);
7686 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7690 std::ostringstream os;
7691 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7692 std::cerr << os.str ();
7696 std::ostringstream os;
7697 os << *prefix <<
"Done" << endl;
7698 std::cerr << os.str ();
7705 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7709 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7710 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7711 const Teuchos::RCP<const map_type>& domainMap,
7712 const Teuchos::RCP<const map_type>& rangeMap,
7713 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7720 using Teuchos::ArrayRCP;
7721 using Teuchos::ArrayView;
7722 using Teuchos::Comm;
7723 using Teuchos::ParameterList;
7726 typedef LocalOrdinal LO;
7727 typedef GlobalOrdinal GO;
7728 typedef node_type NT;
7733 const bool debug = Behavior::debug(
"CrsMatrix");
7734 const bool verbose = Behavior::verbose(
"CrsMatrix");
7735 int MyPID = getComm ()->getRank ();
7737 std::unique_ptr<std::string> verbosePrefix;
7740 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7741 std::ostringstream os;
7742 os <<
"Start" << endl;
7743 std::cerr << os.str();
7750 bool reverseMode =
false;
7751 bool restrictComm =
false;
7753 int mm_optimization_core_count =
7754 Behavior::TAFC_OptimizationCoreCount();
7755 RCP<ParameterList> matrixparams;
7756 bool overrideAllreduce =
false;
7757 bool useKokkosPath =
false;
7758 if (! params.is_null ()) {
7759 matrixparams = sublist (params,
"CrsMatrix");
7760 reverseMode = params->get (
"Reverse Mode", reverseMode);
7761 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7762 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7763 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7764 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7765 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7767 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7768 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7769 if(reverseMode) isMM =
false;
7773 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7775 int reduced_mismatch = 0;
7776 if (isMM && !overrideAllreduce) {
7779 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7780 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7781 rowTransfer.getRemoteLIDs ().size() == 0);
7782 mismatch = (source_vals != target_vals) ? 1 : 0;
7785 Teuchos::REDUCE_MAX, * (getComm ()));
7788 #ifdef HAVE_TPETRA_MMM_TIMINGS
7789 using Teuchos::TimeMonitor;
7791 if(!params.is_null())
7792 label = params->get(
"Timer Label",label);
7793 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7796 std::ostringstream os;
7797 if(isMM) os<<
":MMOpt";
7798 else os<<
":MMLegacy";
7802 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7810 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7811 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7812 TEUCHOS_TEST_FOR_EXCEPTION(
7813 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7814 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7815 "argument must be either an Import or an Export, and its template "
7816 "parameters must match the corresponding template parameters of the "
7824 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type> (domainTransfer);
7825 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type> (domainTransfer);
7827 if(! domainTransfer.is_null()) {
7828 TEUCHOS_TEST_FOR_EXCEPTION(
7829 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7830 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7831 "argument must be either an Import or an Export, and its template "
7832 "parameters must match the corresponding template parameters of the "
7835 TEUCHOS_TEST_FOR_EXCEPTION(
7836 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7837 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7838 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7839 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7840 "arguments must be of the same type (either Import or Export).");
7842 TEUCHOS_TEST_FOR_EXCEPTION(
7843 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7844 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7845 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7846 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7847 "arguments must be of the same type (either Import or Export).");
7853 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7857 RCP<const map_type> MyRowMap = reverseMode ?
7858 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7859 RCP<const map_type> MyColMap;
7860 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7861 domainMap : getDomainMap ();
7862 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7863 rangeMap : getRangeMap ();
7864 RCP<const map_type> BaseRowMap = MyRowMap;
7865 RCP<const map_type> BaseDomainMap = MyDomainMap;
7873 if (! destMat.is_null ()) {
7884 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7885 ! destMat->getGraph ()->isGloballyIndexed ();
7886 TEUCHOS_TEST_FOR_EXCEPTION(
7887 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7888 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7889 "to be nonnull, if its graph is empty (neither locally nor globally "
7898 TEUCHOS_TEST_FOR_EXCEPTION(
7899 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7900 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7901 "input argument 'destMat' is not the same as the (row) Map specified "
7902 "by the input argument 'rowTransfer'.");
7903 TEUCHOS_TEST_FOR_EXCEPTION(
7904 ! destMat->checkSizes (*
this), std::invalid_argument,
7905 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7906 "destination matrix, but checkSizes() indicates that it is not a legal "
7907 "legal target for redistribution from the source matrix (*this). This "
7908 "may mean that they do not have the same dimensions.");
7922 TEUCHOS_TEST_FOR_EXCEPTION(
7923 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7924 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7925 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7926 TEUCHOS_TEST_FOR_EXCEPTION(
7927 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7928 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7929 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7932 TEUCHOS_TEST_FOR_EXCEPTION(
7933 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7934 std::invalid_argument,
7935 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7936 "argument must be the same as the rebalanced domain map 'domainMap'");
7938 TEUCHOS_TEST_FOR_EXCEPTION(
7939 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7940 std::invalid_argument,
7941 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7942 "argument must be the same as the rebalanced domain map 'domainMap'");
7955 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7956 ArrayView<const LO> ExportLIDs = reverseMode ?
7957 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7958 auto RemoteLIDs = reverseMode ?
7959 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7960 auto PermuteToLIDs = reverseMode ?
7961 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7962 auto PermuteFromLIDs = reverseMode ?
7963 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7964 Distributor& Distor = rowTransfer.getDistributor ();
7967 Teuchos::Array<int> SourcePids;
7970 RCP<const map_type> ReducedRowMap, ReducedColMap,
7971 ReducedDomainMap, ReducedRangeMap;
7972 RCP<const Comm<int> > ReducedComm;
7976 if (destMat.is_null ()) {
7977 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7984 #ifdef HAVE_TPETRA_MMM_TIMINGS
7985 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7987 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7988 ReducedComm = ReducedRowMap.is_null () ?
7990 ReducedRowMap->getComm ();
7991 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7993 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7995 MyDomainMap->replaceCommWithSubset (ReducedComm);
7996 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7998 MyRangeMap->replaceCommWithSubset (ReducedComm);
8001 MyRowMap = ReducedRowMap;
8002 MyDomainMap = ReducedDomainMap;
8003 MyRangeMap = ReducedRangeMap;
8006 if (! ReducedComm.is_null ()) {
8007 MyPID = ReducedComm->getRank ();
8014 ReducedComm = MyRowMap->getComm ();
8023 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8026 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8028 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8029 #ifdef HAVE_TPETRA_MMM_TIMINGS
8030 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8038 Import_Util::getPids (*MyImporter, SourcePids,
false);
8040 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8043 #ifdef HAVE_TPETRA_MMM_TIMINGS
8044 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8046 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8047 IntVectorType SourceCol_pids(getColMap());
8049 SourceDomain_pids.putScalar(MyPID);
8051 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8052 SourcePids.resize (getColMap ()->getLocalNumElements ());
8053 SourceCol_pids.get1dCopy (SourcePids ());
8055 else if (MyImporter.is_null ()) {
8057 #ifdef HAVE_TPETRA_MMM_TIMINGS
8058 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8060 SourcePids.resize (getColMap ()->getLocalNumElements ());
8061 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8063 else if ( ! MyImporter.is_null () &&
8064 ! domainTransfer.is_null () ) {
8069 #ifdef HAVE_TPETRA_MMM_TIMINGS
8070 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8074 IntVectorType TargetDomain_pids (domainMap);
8075 TargetDomain_pids.putScalar (MyPID);
8078 IntVectorType SourceDomain_pids (getDomainMap ());
8081 IntVectorType SourceCol_pids (getColMap ());
8083 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8084 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8086 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8087 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8089 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8090 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8092 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8093 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8096 TEUCHOS_TEST_FOR_EXCEPTION(
8097 true, std::logic_error,
"Tpetra::CrsMatrix::"
8098 "transferAndFillComplete: Should never get here! "
8099 "Please report this bug to a Tpetra developer.");
8101 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8102 SourcePids.resize (getColMap ()->getLocalNumElements ());
8103 SourceCol_pids.get1dCopy (SourcePids ());
8105 else if ( ! MyImporter.is_null () &&
8106 BaseDomainMap->isSameAs (*BaseRowMap) &&
8107 getDomainMap ()->isSameAs (*getRowMap ())) {
8109 #ifdef HAVE_TPETRA_MMM_TIMINGS
8110 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8113 IntVectorType TargetRow_pids (domainMap);
8114 IntVectorType SourceRow_pids (getRowMap ());
8115 IntVectorType SourceCol_pids (getColMap ());
8117 TargetRow_pids.putScalar (MyPID);
8118 if (! reverseMode && xferAsImport !=
nullptr) {
8119 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8121 else if (reverseMode && xferAsExport !=
nullptr) {
8122 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8124 else if (! reverseMode && xferAsExport !=
nullptr) {
8125 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8127 else if (reverseMode && xferAsImport !=
nullptr) {
8128 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8131 TEUCHOS_TEST_FOR_EXCEPTION(
8132 true, std::logic_error,
"Tpetra::CrsMatrix::"
8133 "transferAndFillComplete: Should never get here! "
8134 "Please report this bug to a Tpetra developer.");
8137 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8138 SourcePids.resize (getColMap ()->getLocalNumElements ());
8139 SourceCol_pids.get1dCopy (SourcePids ());
8142 TEUCHOS_TEST_FOR_EXCEPTION(
8143 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8144 "transferAndFillComplete: This method only allows either domainMap == "
8145 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8146 "getDomainMap () == getRowMap ()).");
8150 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8152 #ifdef HAVE_TPETRA_MMM_TIMINGS
8153 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8155 if (constantNumPackets == 0) {
8156 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8157 RemoteLIDs.view_host().size ());
8164 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8165 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8171 #ifdef HAVE_TPETRA_MMM_TIMINGS
8172 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8175 using Teuchos::outArg;
8176 using Teuchos::REDUCE_MAX;
8177 using Teuchos::reduceAll;
8180 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8181 const int myRank = comm->getRank ();
8183 std::ostringstream errStrm;
8187 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8190 destMat->numExportPacketsPerLID_.modify_host ();
8191 numExportPacketsPerLID =
8194 catch (std::exception& e) {
8195 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8196 << e.what () << std::endl;
8200 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8201 "an exception not a subclass of std::exception" << std::endl;
8205 if (! comm.is_null ()) {
8206 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8209 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8210 TEUCHOS_TEST_FOR_EXCEPTION(
8211 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8212 "exception on at least one process.");
8216 std::ostringstream os;
8217 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8219 std::cerr << os.str ();
8224 numExportPacketsPerLID,
8227 constantNumPackets);
8229 catch (std::exception& e) {
8230 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8231 << e.what () << std::endl;
8235 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8236 "an exception not a subclass of std::exception" << std::endl;
8241 std::ostringstream os;
8242 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8244 std::cerr << os.str ();
8247 if (! comm.is_null ()) {
8248 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8251 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8252 TEUCHOS_TEST_FOR_EXCEPTION(
8253 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8254 "exception on at least one process.");
8259 destMat->numExportPacketsPerLID_.modify_host ();
8260 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8263 std::ostringstream os;
8264 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8266 std::cerr << os.str ();
8270 numExportPacketsPerLID,
8273 constantNumPackets);
8275 std::ostringstream os;
8276 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8278 std::cerr << os.str ();
8285 #ifdef HAVE_TPETRA_MMM_TIMINGS
8286 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8288 if (! communication_needed) {
8290 std::ostringstream os;
8291 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8292 std::cerr << os.str ();
8297 if (constantNumPackets == 0) {
8299 std::ostringstream os;
8300 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8302 std::cerr << os.str ();
8307 destMat->numExportPacketsPerLID_.sync_host ();
8308 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8310 destMat->numImportPacketsPerLID_.sync_host ();
8311 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8315 std::ostringstream os;
8316 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8318 std::cerr << os.str ();
8320 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8321 destMat->numImportPacketsPerLID_.view_host());
8323 std::ostringstream os;
8324 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8326 std::cerr << os.str ();
8329 size_t totalImportPackets = 0;
8330 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8331 totalImportPackets += numImportPacketsPerLID[i];
8336 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8337 verbosePrefix.get ());
8338 destMat->imports_.modify_host ();
8339 auto hostImports = destMat->imports_.view_host();
8342 destMat->exports_.sync_host ();
8343 auto hostExports = destMat->exports_.view_host();
8345 std::ostringstream os;
8346 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8348 std::cerr << os.str ();
8350 Distor.doReversePostsAndWaits (hostExports,
8351 numExportPacketsPerLID,
8353 numImportPacketsPerLID);
8355 std::ostringstream os;
8356 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8358 std::cerr << os.str ();
8363 std::ostringstream os;
8364 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8366 std::cerr << os.str ();
8368 destMat->imports_.modify_host ();
8369 auto hostImports = destMat->imports_.view_host();
8372 destMat->exports_.sync_host ();
8373 auto hostExports = destMat->exports_.view_host();
8375 std::ostringstream os;
8376 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8378 std::cerr << os.str ();
8380 Distor.doReversePostsAndWaits (hostExports,
8384 std::ostringstream os;
8385 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8387 std::cerr << os.str ();
8392 if (constantNumPackets == 0) {
8394 std::ostringstream os;
8395 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8397 std::cerr << os.str ();
8402 destMat->numExportPacketsPerLID_.sync_host ();
8403 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8405 destMat->numImportPacketsPerLID_.sync_host ();
8406 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8409 std::ostringstream os;
8410 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8412 std::cerr << os.str ();
8414 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8415 destMat->numImportPacketsPerLID_.view_host());
8417 std::ostringstream os;
8418 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8420 std::cerr << os.str ();
8423 size_t totalImportPackets = 0;
8424 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8425 totalImportPackets += numImportPacketsPerLID[i];
8430 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8431 verbosePrefix.get ());
8432 destMat->imports_.modify_host ();
8433 auto hostImports = destMat->imports_.view_host();
8436 destMat->exports_.sync_host ();
8437 auto hostExports = destMat->exports_.view_host();
8439 std::ostringstream os;
8440 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8442 std::cerr << os.str ();
8444 Distor.doPostsAndWaits (hostExports,
8445 numExportPacketsPerLID,
8447 numImportPacketsPerLID);
8449 std::ostringstream os;
8450 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8452 std::cerr << os.str ();
8457 std::ostringstream os;
8458 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8460 std::cerr << os.str ();
8462 destMat->imports_.modify_host ();
8463 auto hostImports = destMat->imports_.view_host();
8466 destMat->exports_.sync_host ();
8467 auto hostExports = destMat->exports_.view_host();
8469 std::ostringstream os;
8470 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8472 std::cerr << os.str ();
8474 Distor.doPostsAndWaits (hostExports,
8478 std::ostringstream os;
8479 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8481 std::cerr << os.str ();
8492 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8494 Teuchos::Array<int> RemotePids;
8496 Teuchos::Array<int> TargetPids;
8502 destMat->numImportPacketsPerLID_.modify_host();
8504 # ifdef HAVE_TPETRA_MMM_TIMINGS
8505 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8507 ArrayRCP<size_t> CSR_rowptr;
8508 ArrayRCP<GO> CSR_colind_GID;
8509 ArrayRCP<LO> CSR_colind_LID;
8510 ArrayRCP<Scalar> CSR_vals;
8512 destMat->imports_.sync_device ();
8513 destMat->numImportPacketsPerLID_.sync_device ();
8515 size_t N = BaseRowMap->getLocalNumElements ();
8517 auto RemoteLIDs_d = RemoteLIDs.view_device();
8518 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8519 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8524 destMat->imports_.view_device(),
8525 destMat->numImportPacketsPerLID_.view_device(),
8539 if (
typeid (LO) ==
typeid (GO)) {
8540 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8543 CSR_colind_LID.resize (CSR_colind_GID.size());
8545 CSR_colind_LID.resize (CSR_colind_GID.size());
8550 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8552 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8554 #ifdef HAVE_TPETRA_MMM_TIMINGS
8555 tmCopySPRdata = Teuchos::null;
8564 std::ostringstream os;
8565 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8567 std::cerr << os.str ();
8570 #ifdef HAVE_TPETRA_MMM_TIMINGS
8571 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8573 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8583 std::ostringstream os;
8584 os << *verbosePrefix <<
"restrictComm="
8585 << (restrictComm ?
"true" :
"false") << std::endl;
8586 std::cerr << os.str ();
8593 #ifdef HAVE_TPETRA_MMM_TIMINGS
8594 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8597 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8599 MyColMap->replaceCommWithSubset (ReducedComm);
8600 MyColMap = ReducedColMap;
8605 std::ostringstream os;
8606 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8607 std::cerr << os.str ();
8609 destMat->replaceColMap (MyColMap);
8616 if (ReducedComm.is_null ()) {
8618 std::ostringstream os;
8619 os << *verbosePrefix <<
"I am no longer in the communicator; "
8620 "returning" << std::endl;
8621 std::cerr << os.str ();
8630 if ((! reverseMode && xferAsImport !=
nullptr) ||
8631 (reverseMode && xferAsExport !=
nullptr)) {
8633 std::ostringstream os;
8634 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8635 std::cerr << os.str ();
8637 #ifdef HAVE_TPETRA_MMM_TIMINGS
8638 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8640 Import_Util::sortCrsEntries (CSR_rowptr(),
8644 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8645 (reverseMode && xferAsImport !=
nullptr)) {
8647 std::ostringstream os;
8648 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8650 std::cerr << os.str();
8652 #ifdef HAVE_TPETRA_MMM_TIMINGS
8653 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8655 Import_Util::sortAndMergeCrsEntries (CSR_rowptr(),
8658 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8659 CSR_colind_LID.resize (CSR_rowptr[N]);
8660 CSR_vals.resize (CSR_rowptr[N]);
8664 TEUCHOS_TEST_FOR_EXCEPTION(
8665 true, std::logic_error,
"Tpetra::CrsMatrix::"
8666 "transferAndFillComplete: Should never get here! "
8667 "Please report this bug to a Tpetra developer.");
8674 std::ostringstream os;
8675 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8676 std::cerr << os.str ();
8685 #ifdef HAVE_TPETRA_MMM_TIMINGS
8686 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8688 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8700 destMat->numImportPacketsPerLID_.modify_host();
8702 # ifdef HAVE_TPETRA_MMM_TIMINGS
8703 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8705 ArrayRCP<size_t> CSR_rowptr;
8706 ArrayRCP<GO> CSR_colind_GID;
8707 ArrayRCP<LO> CSR_colind_LID;
8708 ArrayRCP<Scalar> CSR_vals;
8710 destMat->imports_.sync_device ();
8711 destMat->numImportPacketsPerLID_.sync_device ();
8713 size_t N = BaseRowMap->getLocalNumElements ();
8715 auto RemoteLIDs_d = RemoteLIDs.view_device();
8716 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8717 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8719 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8720 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8721 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8722 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8723 Kokkos::View<int*,device_type> TargetPids_d;
8728 destMat->imports_.view_device(),
8729 destMat->numImportPacketsPerLID_.view_device(),
8741 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8743 #ifdef HAVE_TPETRA_MMM_TIMINGS
8744 tmCopySPRdata = Teuchos::null;
8753 std::ostringstream os;
8754 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8756 std::cerr << os.str ();
8759 #ifdef HAVE_TPETRA_MMM_TIMINGS
8760 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8762 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8772 std::ostringstream os;
8773 os << *verbosePrefix <<
"restrictComm="
8774 << (restrictComm ?
"true" :
"false") << std::endl;
8775 std::cerr << os.str ();
8782 #ifdef HAVE_TPETRA_MMM_TIMINGS
8783 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8786 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8788 MyColMap->replaceCommWithSubset (ReducedComm);
8789 MyColMap = ReducedColMap;
8794 std::ostringstream os;
8795 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8796 std::cerr << os.str ();
8798 destMat->replaceColMap (MyColMap);
8805 if (ReducedComm.is_null ()) {
8807 std::ostringstream os;
8808 os << *verbosePrefix <<
"I am no longer in the communicator; "
8809 "returning" << std::endl;
8810 std::cerr << os.str ();
8820 if ((! reverseMode && xferAsImport !=
nullptr) ||
8821 (reverseMode && xferAsExport !=
nullptr)) {
8823 std::ostringstream os;
8824 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8825 std::cerr << os.str ();
8827 #ifdef HAVE_TPETRA_MMM_TIMINGS
8828 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8830 Import_Util::sortCrsEntries (CSR_rowptr_d,
8834 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8835 (reverseMode && xferAsImport !=
nullptr)) {
8837 std::ostringstream os;
8838 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8840 std::cerr << os.str();
8842 #ifdef HAVE_TPETRA_MMM_TIMINGS
8843 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8845 Import_Util::sortAndMergeCrsEntries (CSR_rowptr_d,
8850 TEUCHOS_TEST_FOR_EXCEPTION(
8851 true, std::logic_error,
"Tpetra::CrsMatrix::"
8852 "transferAndFillComplete: Should never get here! "
8853 "Please report this bug to a Tpetra developer.");
8861 std::ostringstream os;
8862 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8863 std::cerr << os.str ();
8867 #ifdef HAVE_TPETRA_MMM_TIMINGS
8868 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8870 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8878 #ifdef HAVE_TPETRA_MMM_TIMINGS
8879 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8882 Teuchos::ParameterList esfc_params;
8884 RCP<import_type> MyImport;
8887 if (iallreduceRequest.get () !=
nullptr) {
8889 std::ostringstream os;
8890 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8892 std::cerr << os.str ();
8894 iallreduceRequest->wait ();
8895 if (reduced_mismatch != 0) {
8901 #ifdef HAVE_TPETRA_MMM_TIMINGS
8902 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8907 std::ostringstream os;
8908 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8909 std::cerr << os.str ();
8912 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8913 Teuchos::ArrayRCP<int> type3PIDs;
8914 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8915 auto colind = getCrsGraph()->getLocalIndicesHost();
8918 std::ostringstream os;
8919 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8920 std::cerr << os.str ();
8924 #ifdef HAVE_TPETRA_MMM_TIMINGS
8925 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8927 Import_Util::reverseNeighborDiscovery(*
this,
8939 std::ostringstream os;
8940 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8941 std::cerr << os.str ();
8944 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8945 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8947 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8948 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8950 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8952 std::vector<bool> IsOwned(numCols,
true);
8953 std::vector<int> SentTo(numCols,-1);
8954 if (! MyImporter.is_null ()) {
8955 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8956 IsOwned[rlid]=
false;
8960 std::vector<std::pair<int,GO> > usrtg;
8961 usrtg.reserve(TEPID2.size());
8964 const auto& colMap = * (this->getColMap ());
8966 const LO row = TELID2[i];
8967 const int pid = TEPID2[i];
8968 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8969 const int col = colind[j];
8970 if (IsOwned[col] && SentTo[col] != pid) {
8972 GO gid = colMap.getGlobalElement (col);
8973 usrtg.push_back (std::pair<int,GO> (pid, gid));
8981 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8983 usrtg.erase(eopg,usrtg.end());
8986 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8987 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8990 for(
auto && p : usrtg) {
8991 EPID2[pos]= p.first;
8992 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8996 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8997 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8998 GO InfGID = std::numeric_limits<GO>::max();
8999 int InfPID = INT_MAX;
9002 #endif // TPETRA_MIN3
9003 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9004 int i1=0, i2=0, i3=0;
9005 int Len1 = EPID1.size();
9006 int Len2 = EPID2.size();
9007 int Len3 = EPID3.size();
9009 int MyLen=Len1+Len2+Len3;
9010 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
9011 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
9014 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9015 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9016 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9017 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9019 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9020 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9021 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9023 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9024 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9027 #endif // TPETRA_MIN3
9028 bool added_entry=
false;
9030 if(PID1 == MIN_PID && GID1 == MIN_GID){
9031 userExportLIDs[iloc]=ELID1[i1];
9032 userExportPIDs[iloc]=EPID1[i1];
9037 if(PID2 == MIN_PID && GID2 == MIN_GID){
9039 userExportLIDs[iloc]=ELID2[i2];
9040 userExportPIDs[iloc]=EPID2[i2];
9046 if(PID3 == MIN_PID && GID3 == MIN_GID){
9048 userExportLIDs[iloc]=ELID3[i3];
9049 userExportPIDs[iloc]=EPID3[i3];
9057 std::ostringstream os;
9058 os << *verbosePrefix <<
"Create Import" << std::endl;
9059 std::cerr << os.str ();
9062 #ifdef HAVE_TPETRA_MMM_TIMINGS
9063 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9065 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9067 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9068 MyImport = rcp (
new import_type (MyDomainMap,
9071 userExportLIDs.view(0,iloc).getConst(),
9072 userExportPIDs.view(0,iloc).getConst(),
9077 std::ostringstream os;
9078 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9079 std::cerr << os.str ();
9083 #ifdef HAVE_TPETRA_MMM_TIMINGS
9084 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9085 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9087 if(!params.is_null())
9088 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9089 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9095 #ifdef HAVE_TPETRA_MMM_TIMINGS
9096 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9099 std::ostringstream os;
9100 os << *verbosePrefix <<
"Create Import" << std::endl;
9101 std::cerr << os.str ();
9104 #ifdef HAVE_TPETRA_MMM_TIMINGS
9105 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9107 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9108 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9109 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9110 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9113 std::ostringstream os;
9114 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9115 std::cerr << os.str ();
9118 #ifdef HAVE_TPETRA_MMM_TIMINGS
9119 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9120 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9122 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9125 if (!params.is_null ()) {
9126 esfc_params.set (
"compute global constants",
9127 params->get (
"compute global constants",
true));
9129 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9130 MyImport, Teuchos::null,
9131 rcp (
new Teuchos::ParameterList (esfc_params)));
9134 #ifdef HAVE_TPETRA_MMM_TIMINGS
9135 tmIESFC = Teuchos::null;
9139 std::ostringstream os;
9140 os << *verbosePrefix <<
"Done" << endl;
9141 std::cerr << os.str ();
9146 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9151 const Teuchos::RCP<const map_type>& domainMap,
9152 const Teuchos::RCP<const map_type>& rangeMap,
9153 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9155 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9158 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9164 const Teuchos::RCP<const map_type>& domainMap,
9165 const Teuchos::RCP<const map_type>& rangeMap,
9166 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9168 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9171 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9176 const Teuchos::RCP<const map_type>& domainMap,
9177 const Teuchos::RCP<const map_type>& rangeMap,
9178 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9180 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9183 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9189 const Teuchos::RCP<const map_type>& domainMap,
9190 const Teuchos::RCP<const map_type>& rangeMap,
9191 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9193 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9204 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9206 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9208 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9210 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9211 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9213 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9215 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9216 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9217 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9218 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9219 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9220 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9221 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9222 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9223 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9224 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9225 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9226 const Teuchos::RCP<Teuchos::ParameterList>& params);
9228 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9230 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9231 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9232 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9233 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9234 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9235 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9236 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9237 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9238 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9239 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9240 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9241 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9242 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9243 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9244 const Teuchos::RCP<Teuchos::ParameterList>& params);
9247 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9249 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9250 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9251 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9252 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9253 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9254 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9255 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9256 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9257 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9258 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9259 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9260 const Teuchos::RCP<Teuchos::ParameterList>& params);
9262 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9264 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9265 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9266 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9267 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9268 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9269 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9270 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9271 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9272 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9273 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9274 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9275 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9276 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9277 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9278 const Teuchos::RCP<Teuchos::ParameterList>& params);
9281 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9282 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9283 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9284 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9285 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9286 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9288 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror num_row_entries_type
Row offsets for "1-D" storage.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.