10 #ifndef TPETRA_CRSMATRIX_DEF_HPP
11 #define TPETRA_CRSMATRIX_DEF_HPP
23 #include "Tpetra_RowMatrix.hpp"
24 #include "Tpetra_LocalCrsMatrixOperator.hpp"
25 #include "Tpetra_computeRowAndColumnOneNorms.hpp"
34 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
42 #include "Tpetra_Details_packCrsMatrix.hpp"
43 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
45 #include "Teuchos_FancyOStream.hpp"
46 #include "Teuchos_RCP.hpp"
47 #include "Teuchos_DataAccess.hpp"
48 #include "Teuchos_SerialDenseMatrix.hpp"
49 #include "KokkosBlas1_scal.hpp"
50 #include "KokkosSparse_getDiagCopy.hpp"
51 #include "KokkosSparse_spmv.hpp"
64 template<
class T,
class BinaryFunction>
65 T atomic_binary_function_update (T*
const dest,
79 T newVal = f (assume, inputVal);
80 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
81 }
while (assume != oldVal);
101 template<
class Scalar>
105 typedef Teuchos::ScalarTraits<Scalar> STS;
106 return std::max (STS::magnitude (x), STS::magnitude (y));
115 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
116 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
117 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
118 size_t maxNumEntriesPerRow,
119 const Teuchos::RCP<Teuchos::ParameterList>& params) :
122 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
123 "[, RCP<ParameterList>]): ";
124 Teuchos::RCP<crs_graph_type> graph;
126 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
129 catch (std::exception& e) {
130 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
131 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
132 "size_t [, RCP<ParameterList>]) threw an exception: "
139 staticGraph_ = myGraph_;
144 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
147 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
148 const Teuchos::RCP<Teuchos::ParameterList>& params) :
151 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
152 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
153 Teuchos::RCP<crs_graph_type> graph;
159 catch (std::exception& e) {
160 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
161 (
true, std::runtime_error,
"CrsGraph constructor "
162 "(RCP<const Map>, ArrayView<const size_t>"
163 "[, RCP<ParameterList>]) threw an exception: "
170 staticGraph_ = graph;
175 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
178 const Teuchos::RCP<const map_type>& colMap,
179 const size_t maxNumEntPerRow,
180 const Teuchos::RCP<Teuchos::ParameterList>& params) :
183 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
184 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
185 const char suffix[] =
186 " Please report this bug to the Tpetra developers.";
189 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
190 (! staticGraph_.is_null (), std::logic_error,
191 "staticGraph_ is not null at the beginning of the constructor."
193 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
194 (! myGraph_.is_null (), std::logic_error,
195 "myGraph_ is not null at the beginning of the constructor."
197 Teuchos::RCP<crs_graph_type> graph;
203 catch (std::exception& e) {
204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
205 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
206 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
207 "exception: " << e.what ());
213 staticGraph_ = myGraph_;
218 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
221 const Teuchos::RCP<const map_type>& colMap,
222 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
223 const Teuchos::RCP<Teuchos::ParameterList>& params) :
226 const char tfecfFuncName[] =
227 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
228 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
229 Teuchos::RCP<crs_graph_type> graph;
235 catch (std::exception& e) {
236 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
237 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
238 "RCP<const Map>, ArrayView<const size_t>[, "
239 "RCP<ParameterList>]) threw an exception: " << e.what ());
245 staticGraph_ = graph;
251 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
253 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
254 const Teuchos::RCP<Teuchos::ParameterList>& ) :
256 staticGraph_ (graph),
257 storageStatus_ (Details::STORAGE_1D_PACKED)
260 typedef typename local_matrix_device_type::values_type values_type;
261 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
262 "RCP<ParameterList>]): ";
265 std::unique_ptr<std::string> prefix;
267 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
268 std::ostringstream os;
269 os << *prefix <<
"Start" << endl;
270 std::cerr << os.str ();
273 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
274 (graph.is_null (), std::runtime_error,
"Input graph is null.");
275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
276 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
277 "is not fill complete. You must call fillComplete on the "
278 "graph before using it to construct a CrsMatrix. Note that "
279 "calling resumeFill on the graph makes it not fill complete, "
280 "even if you had previously called fillComplete. In that "
281 "case, you must call fillComplete on the graph again.");
289 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
291 std::ostringstream os;
292 os << *prefix <<
"Allocate values: " << numEnt << endl;
293 std::cerr << os.str ();
296 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
298 valuesUnpacked_wdv = valuesPacked_wdv;
303 std::ostringstream os;
304 os << *prefix <<
"Done" << endl;
305 std::cerr << os.str ();
309 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
312 const Teuchos::RCP<const crs_graph_type>& graph,
313 const Teuchos::RCP<Teuchos::ParameterList>& params) :
315 staticGraph_ (graph),
316 storageStatus_ (matrix.storageStatus_)
318 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
319 "local_matrix_device_type::values_type, "
320 "[,RCP<ParameterList>]): ";
321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
322 (graph.is_null (), std::runtime_error,
"Input graph is null.");
323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
324 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
325 "is not fill complete. You must call fillComplete on the "
326 "graph before using it to construct a CrsMatrix. Note that "
327 "calling resumeFill on the graph makes it not fill complete, "
328 "even if you had previously called fillComplete. In that "
329 "case, you must call fillComplete on the graph again.");
331 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
332 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
334 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
335 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
341 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
343 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
344 const typename local_matrix_device_type::values_type& values,
345 const Teuchos::RCP<Teuchos::ParameterList>& ) :
347 staticGraph_ (graph),
348 storageStatus_ (Details::STORAGE_1D_PACKED)
350 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
351 "local_matrix_device_type::values_type, "
352 "[,RCP<ParameterList>]): ";
353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
354 (graph.is_null (), std::runtime_error,
"Input graph is null.");
355 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
356 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
357 "is not fill complete. You must call fillComplete on the "
358 "graph before using it to construct a CrsMatrix. Note that "
359 "calling resumeFill on the graph makes it not fill complete, "
360 "even if you had previously called fillComplete. In that "
361 "case, you must call fillComplete on the graph again.");
370 valuesUnpacked_wdv = valuesPacked_wdv;
381 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
384 const Teuchos::RCP<const map_type>& colMap,
385 const typename local_graph_device_type::row_map_type& rowPointers,
386 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
387 const typename local_matrix_device_type::values_type& values,
388 const Teuchos::RCP<Teuchos::ParameterList>& params) :
390 storageStatus_ (Details::STORAGE_1D_PACKED)
392 using Details::getEntryOnHost;
395 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
396 "RCP<const Map>, ptr, ind, val[, params]): ";
397 const char suffix[] =
398 ". Please report this bug to the Tpetra developers.";
402 std::unique_ptr<std::string> prefix;
404 prefix = this->createPrefix(
405 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
406 std::ostringstream os;
407 os << *prefix <<
"Start" << endl;
408 std::cerr << os.str ();
415 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
416 (values.extent(0) != columnIndices.extent(0),
417 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
418 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
420 if (debug && rowPointers.extent(0) != 0) {
421 const size_t numEnt =
422 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
423 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
424 (numEnt !=
size_t(columnIndices.extent(0)) ||
425 numEnt !=
size_t(values.extent(0)),
426 std::invalid_argument,
"Last entry of rowPointers says that "
427 "the matrix has " << numEnt <<
" entr"
428 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
429 "columnIndices and values don't match this. "
430 "columnIndices.extent(0)=" << columnIndices.extent (0)
431 <<
" and values.extent(0)=" << values.extent (0) <<
".");
434 RCP<crs_graph_type> graph;
436 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
437 columnIndices, params));
439 catch (std::exception& e) {
440 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
441 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
442 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
450 auto lclGraph = graph->getLocalGraphDevice ();
451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
452 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
453 lclGraph.entries.extent (0) != columnIndices.extent (0),
454 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
455 "ind[, params]) did not set the local graph correctly." << suffix);
456 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
457 (lclGraph.entries.extent (0) != values.extent (0),
458 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
459 "params]) did not set the local graph correctly. "
460 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
461 <<
" != values.extent(0) = " << values.extent (0) << suffix);
467 staticGraph_ = graph;
477 valuesUnpacked_wdv = valuesPacked_wdv;
486 std::ostringstream os;
487 os << *prefix <<
"Done" << endl;
488 std::cerr << os.str();
492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
495 const Teuchos::RCP<const map_type>& colMap,
496 const Teuchos::ArrayRCP<size_t>& ptr,
497 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
498 const Teuchos::ArrayRCP<Scalar>& val,
499 const Teuchos::RCP<Teuchos::ParameterList>& params) :
501 storageStatus_ (Details::STORAGE_1D_PACKED)
503 using Kokkos::Compat::getKokkosViewDeepCopy;
504 using Teuchos::av_reinterpret_cast;
506 using values_type =
typename local_matrix_device_type::values_type;
508 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
509 "RCP<const Map>, ptr, ind, val[, params]): ";
511 RCP<crs_graph_type> graph;
516 catch (std::exception& e) {
517 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
518 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
519 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
520 "RCP<ParameterList>]) threw an exception: " << e.what ());
526 staticGraph_ = graph;
539 auto lclGraph = staticGraph_->getLocalGraphDevice ();
540 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
541 (
size_t (lclGraph.row_map.extent (0)) !=
size_t (ptr.size ()) ||
542 size_t (lclGraph.entries.extent (0)) !=
size_t (ind.size ()),
543 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
544 "ptr, ind[, params]) did not set the local graph correctly. "
545 "Please report this bug to the Tpetra developers.");
548 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
550 valuesUnpacked_wdv = valuesPacked_wdv;
560 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
563 const Teuchos::RCP<const map_type>& colMap,
565 const Teuchos::RCP<Teuchos::ParameterList>& params) :
567 storageStatus_ (Details::STORAGE_1D_PACKED),
570 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
571 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
572 const char suffix[] =
573 " Please report this bug to the Tpetra developers.";
575 Teuchos::RCP<crs_graph_type> graph;
578 lclMatrix.graph, params));
580 catch (std::exception& e) {
581 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
582 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
583 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
584 "exception: " << e.what ());
586 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
587 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
588 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
589 "did not produce a fill-complete graph. Please report this bug to the "
590 "Tpetra developers.");
595 staticGraph_ = graph;
598 valuesUnpacked_wdv = valuesPacked_wdv;
600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
602 "At the end of a CrsMatrix constructor that should produce "
603 "a fillComplete matrix, isFillActive() is true." << suffix);
604 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
606 "CrsMatrix constructor that should produce a fillComplete "
607 "matrix, isFillComplete() is false." << suffix);
611 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
614 const Teuchos::RCP<const map_type>& rowMap,
615 const Teuchos::RCP<const map_type>& colMap,
616 const Teuchos::RCP<const map_type>& domainMap,
617 const Teuchos::RCP<const map_type>& rangeMap,
618 const Teuchos::RCP<Teuchos::ParameterList>& params) :
620 storageStatus_ (Details::STORAGE_1D_PACKED),
623 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
624 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
625 "local_matrix_device_type[, RCP<ParameterList>]): ";
626 const char suffix[] =
627 " Please report this bug to the Tpetra developers.";
629 Teuchos::RCP<crs_graph_type> graph;
631 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
632 domainMap, rangeMap, params));
634 catch (std::exception& e) {
635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
636 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
637 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
638 "RCP<ParameterList>]) threw an exception: " << e.what ());
640 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
641 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
642 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
643 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
644 "not produce a fillComplete graph." << suffix);
649 staticGraph_ = graph;
652 valuesUnpacked_wdv = valuesPacked_wdv;
654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
656 "At the end of a CrsMatrix constructor that should produce "
657 "a fillComplete matrix, isFillActive() is true." << suffix);
658 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
660 "CrsMatrix constructor that should produce a fillComplete "
661 "matrix, isFillComplete() is false." << suffix);
665 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
668 const Teuchos::RCP<const map_type>& rowMap,
669 const Teuchos::RCP<const map_type>& colMap,
670 const Teuchos::RCP<const map_type>& domainMap,
671 const Teuchos::RCP<const map_type>& rangeMap,
672 const Teuchos::RCP<const import_type>& importer,
673 const Teuchos::RCP<const export_type>& exporter,
674 const Teuchos::RCP<Teuchos::ParameterList>& params) :
676 storageStatus_ (Details::STORAGE_1D_PACKED),
680 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
681 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
682 const char suffix[] =
683 " Please report this bug to the Tpetra developers.";
685 Teuchos::RCP<crs_graph_type> graph;
688 domainMap, rangeMap, importer,
691 catch (std::exception& e) {
692 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
693 (
true, std::runtime_error,
"CrsGraph constructor "
694 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
695 "params) threw: " << e.what ());
697 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
698 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
699 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
700 "Export, params) did not produce a fill-complete graph. "
701 "Please report this bug to the Tpetra developers.");
706 staticGraph_ = graph;
709 valuesUnpacked_wdv = valuesPacked_wdv;
711 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
713 "At the end of a CrsMatrix constructor that should produce "
714 "a fillComplete matrix, isFillActive() is true." << suffix);
715 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
717 "CrsMatrix constructor that should produce a fillComplete "
718 "matrix, isFillComplete() is false." << suffix);
722 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
725 const Teuchos::DataAccess copyOrView):
727 staticGraph_ (source.getCrsGraph()),
728 storageStatus_ (source.storageStatus_)
730 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
731 "const CrsMatrix&, const Teuchos::DataAccess): ";
732 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
734 "Source graph must be fillComplete().");
736 if (copyOrView == Teuchos::Copy) {
737 using values_type =
typename local_matrix_device_type::values_type;
739 using Kokkos::view_alloc;
740 using Kokkos::WithoutInitializing;
741 values_type newvals (view_alloc (
"val", WithoutInitializing),
746 valuesUnpacked_wdv = valuesPacked_wdv;
749 else if (copyOrView == Teuchos::View) {
755 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
756 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
757 "has an invalid value " << copyOrView <<
". Valid values "
758 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
759 "Teuchos::View = " << Teuchos::View <<
".");
764 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
769 std::swap(crs_matrix.
importMV_, this->importMV_);
770 std::swap(crs_matrix.
exportMV_, this->exportMV_);
771 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
772 std::swap(crs_matrix.myGraph_, this->myGraph_);
773 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
774 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
777 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
780 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
781 Teuchos::RCP<const Teuchos::Comm<int> >
784 return getCrsGraphRef ().getComm ();
787 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
791 return fillComplete_;
794 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
798 return ! fillComplete_;
801 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
805 return this->getCrsGraphRef ().isStorageOptimized ();
808 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
812 return getCrsGraphRef ().isLocallyIndexed ();
815 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
819 return getCrsGraphRef ().isGloballyIndexed ();
822 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
826 return getCrsGraphRef ().hasColMap ();
829 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
833 return getCrsGraphRef ().getGlobalNumEntries ();
836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
840 return getCrsGraphRef ().getLocalNumEntries ();
843 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
847 return getCrsGraphRef ().getGlobalNumRows ();
850 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
854 return getCrsGraphRef ().getGlobalNumCols ();
857 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
861 return getCrsGraphRef ().getLocalNumRows ();
865 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
869 return getCrsGraphRef ().getLocalNumCols ();
873 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
877 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
880 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
884 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
887 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
891 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
894 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
898 return getCrsGraphRef ().getLocalMaxNumRowEntries ();
901 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
905 return getRowMap ()->getIndexBase ();
908 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
909 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
912 return getCrsGraphRef ().getRowMap ();
915 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
916 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
919 return getCrsGraphRef ().getColMap ();
922 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
923 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
926 return getCrsGraphRef ().getDomainMap ();
929 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
930 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
933 return getCrsGraphRef ().getRangeMap ();
936 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
937 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
940 if (staticGraph_ != Teuchos::null) {
946 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
947 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
950 if (staticGraph_ != Teuchos::null) {
956 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
961 #ifdef HAVE_TPETRA_DEBUG
962 constexpr
bool debug =
true;
964 constexpr
bool debug =
false;
965 #endif // HAVE_TPETRA_DEBUG
967 if (! this->staticGraph_.is_null ()) {
968 return * (this->staticGraph_);
972 const char tfecfFuncName[] =
"getCrsGraphRef: ";
973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
974 (this->myGraph_.is_null (), std::logic_error,
975 "Both staticGraph_ and myGraph_ are null. "
976 "Please report this bug to the Tpetra developers.");
978 return * (this->myGraph_);
982 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
983 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
987 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
990 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
991 staticGraph_->getLocalGraphDevice());
994 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
995 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
999 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
1000 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
1001 valuesPacked_wdv.getHostView(Access::ReadWrite),
1002 staticGraph_->getLocalGraphHost());
1005 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1009 return myGraph_.is_null ();
1012 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1026 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1035 const char tfecfFuncName[] =
"allocateValues: ";
1036 const char suffix[] =
1037 " Please report this bug to the Tpetra developers.";
1038 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1040 std::unique_ptr<std::string> prefix;
1042 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1043 std::ostringstream os;
1044 os << *prefix <<
"lg: "
1045 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1047 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1048 <<
"Allocated" << endl;
1049 std::cerr << os.str();
1052 const bool debug = Behavior::debug(
"CrsMatrix");
1054 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1055 (this->staticGraph_.is_null (), std::logic_error,
1056 "staticGraph_ is null." << suffix);
1061 if ((gas == GraphAlreadyAllocated) !=
1062 staticGraph_->indicesAreAllocated ()) {
1063 const char err1[] =
"The caller has asserted that the graph "
1065 const char err2[] =
"already allocated, but the static graph "
1066 "says that its indices are ";
1067 const char err3[] =
"already allocated. ";
1068 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1069 (gas == GraphAlreadyAllocated &&
1070 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1071 err1 << err2 <<
"not " << err3 << suffix);
1072 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1073 (gas != GraphAlreadyAllocated &&
1074 staticGraph_->indicesAreAllocated (), std::logic_error,
1075 err1 <<
"not " << err2 << err3 << suffix);
1083 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1084 (! this->staticGraph_->indicesAreAllocated () &&
1085 this->myGraph_.is_null (), std::logic_error,
1086 "The static graph says that its indices are not allocated, "
1087 "but the graph is not owned by the matrix." << suffix);
1090 if (gas == GraphNotYetAllocated) {
1092 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1093 (this->myGraph_.is_null (), std::logic_error,
1094 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1097 this->myGraph_->allocateIndices (lg, verbose);
1099 catch (std::exception& e) {
1100 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1101 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1102 "threw an exception: " << e.what ());
1105 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1106 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1107 "threw an exception not a subclass of std::exception.");
1112 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1114 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1115 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1116 (this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1117 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1121 using values_type =
typename local_matrix_device_type::values_type;
1123 std::ostringstream os;
1124 os << *prefix <<
"Allocate values_wdv: Pre "
1125 << valuesUnpacked_wdv.extent(0) <<
", post "
1126 << lclTotalNumEntries << endl;
1127 std::cerr << os.str();
1131 values_type(
"Tpetra::CrsMatrix::values",
1132 lclTotalNumEntries));
1136 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1142 using ::Tpetra::Details::getEntryOnHost;
1143 using Teuchos::arcp_const_cast;
1144 using Teuchos::Array;
1145 using Teuchos::ArrayRCP;
1146 using Teuchos::null;
1150 using row_map_type =
typename local_graph_device_type::row_map_type;
1151 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1152 using values_type =
typename local_matrix_device_type::values_type;
1154 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1156 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1157 "fillComplete or expertStaticFillComplete): ";
1158 const char suffix[] =
1159 " Please report this bug to the Tpetra developers.";
1163 std::unique_ptr<std::string> prefix;
1165 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1166 std::ostringstream os;
1167 os << *prefix << endl;
1168 std::cerr << os.str ();
1174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1175 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1176 "(myGraph_) is null. This means that the matrix has a "
1177 "const (a.k.a. \"static\") graph. fillComplete or "
1178 "expertStaticFillComplete should never call "
1179 "fillLocalGraphAndMatrix in that case." << suffix);
1182 const size_t lclNumRows = this->getLocalNumRows ();
1197 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1198 myGraph_->rowPtrsUnpacked_dev_;
1201 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1202 (curRowOffsets.extent (0) == 0, std::logic_error,
1203 "curRowOffsets.extent(0) == 0.");
1204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1205 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1206 "curRowOffsets.extent(0) = "
1207 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1208 << (lclNumRows + 1) <<
".");
1209 const size_t numOffsets = curRowOffsets.extent (0);
1210 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1211 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1213 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1214 std::logic_error,
"numOffsets = " <<
1215 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1216 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1217 << numOffsets <<
") = " << valToCheck <<
".");
1220 if (myGraph_->getLocalNumEntries() !=
1221 myGraph_->getLocalAllocationSize()) {
1225 typename row_map_type::non_const_type k_ptrs;
1226 row_map_type k_ptrs_const;
1227 lclinds_1d_type k_inds;
1231 std::ostringstream os;
1232 const auto numEnt = myGraph_->getLocalNumEntries();
1233 const auto allocSize = myGraph_->getLocalAllocationSize();
1234 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1235 <<
", allocSize=" << allocSize << endl;
1236 std::cerr << os.str ();
1244 if (debug && curRowOffsets.extent (0) != 0) {
1245 const size_t numOffsets =
1246 static_cast<size_t> (curRowOffsets.extent (0));
1247 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1248 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1249 (static_cast<size_t> (valToCheck) !=
1250 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1251 std::logic_error,
"(unpacked branch) Before "
1252 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1253 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1254 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1255 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1256 (static_cast<size_t> (valToCheck) !=
1257 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1258 std::logic_error,
"(unpacked branch) Before "
1259 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1260 <<
") = " << valToCheck
1261 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1262 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1270 size_t lclTotalNumEntries = 0;
1276 std::ostringstream os;
1277 os << *prefix <<
"Allocate packed row offsets: "
1278 << (lclNumRows+1) << endl;
1279 std::cerr << os.str ();
1281 typename row_map_type::non_const_type
1282 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1283 typename row_entries_type::const_type numRowEnt_h =
1284 myGraph_->k_numRowEntries_;
1287 lclTotalNumEntries =
1291 k_ptrs = packedRowOffsets;
1292 k_ptrs_const = k_ptrs;
1296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1297 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1299 "(unpacked branch) After packing k_ptrs, "
1300 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1301 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1302 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1303 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1304 (valToCheck != lclTotalNumEntries, std::logic_error,
1305 "(unpacked branch) After filling k_ptrs, "
1306 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1307 <<
" != total number of entries on the calling process = "
1308 << lclTotalNumEntries <<
".");
1313 std::ostringstream os;
1314 os << *prefix <<
"Allocate packed local column indices: "
1315 << lclTotalNumEntries << endl;
1316 std::cerr << os.str ();
1318 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1320 std::ostringstream os;
1321 os << *prefix <<
"Allocate packed values: "
1322 << lclTotalNumEntries << endl;
1323 std::cerr << os.str ();
1325 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1337 using inds_packer_type = pack_functor<
1338 typename Graph::local_graph_device_type::entries_type::non_const_type,
1339 typename Graph::local_inds_dualv_type::t_dev::const_type,
1340 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1341 typename Graph::local_graph_device_type::row_map_type>;
1342 inds_packer_type indsPacker (
1344 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1345 k_ptrs, curRowOffsets);
1347 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1348 Kokkos::parallel_for
1349 (
"Tpetra::CrsMatrix pack column indices",
1350 range_type (0, lclNumRows), indsPacker);
1354 using vals_packer_type = pack_functor<
1355 typename values_type::non_const_type,
1356 typename values_type::const_type,
1357 typename row_map_type::non_const_type,
1358 typename row_map_type::const_type>;
1359 vals_packer_type valsPacker (
1361 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1362 k_ptrs, curRowOffsets);
1363 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1364 range_type (0, lclNumRows), valsPacker);
1367 const char myPrefix[] =
"(\"Optimize Storage\""
1368 "=true branch) After packing, ";
1369 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1370 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1371 <<
"k_ptrs.extent(0) = 0. This probably means that "
1372 "rowPtrsUnpacked_ was never allocated.");
1373 if (k_ptrs.extent (0) != 0) {
1374 const size_t numOffsets (k_ptrs.extent (0));
1375 const auto valToCheck =
1376 getEntryOnHost (k_ptrs, numOffsets - 1);
1377 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1378 (
size_t (valToCheck) != k_vals.extent (0),
1379 std::logic_error, myPrefix <<
1380 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1381 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1382 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1383 (
size_t (valToCheck) != k_inds.extent (0),
1384 std::logic_error, myPrefix <<
1385 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1386 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1390 myGraph_->setRowPtrsPacked(k_ptrs_const);
1391 myGraph_->lclIndsPacked_wdv =
1398 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1399 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1400 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1401 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1402 valuesPacked_wdv = valuesUnpacked_wdv;
1405 std::ostringstream os;
1406 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1407 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1408 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1409 << valuesUnpacked_wdv.extent(0) << endl;
1410 std::cerr << os.str();
1414 const char myPrefix[] =
1415 "(\"Optimize Storage\"=false branch) ";
1416 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1417 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1418 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1419 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1420 "that rowPtrsUnpacked_ was never allocated.");
1421 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1422 const size_t numOffsets = rowPtrsUnpackedHost.extent (0);
1423 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1424 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1425 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1426 std::logic_error, myPrefix <<
1427 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1428 <<
" != valuesPacked_wdv.extent(0) = "
1429 << valuesPacked_wdv.extent (0) <<
".");
1430 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1431 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1432 std::logic_error, myPrefix <<
1433 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1434 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1435 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1441 const char myPrefix[] =
"After packing, ";
1442 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1443 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1444 (
size_t (rowPtrsPackedHost.extent (0)) !=
size_t (lclNumRows + 1),
1445 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1446 << rowPtrsPackedHost.extent (0) <<
" != lclNumRows+1 = " <<
1447 (lclNumRows+1) <<
".");
1448 if (rowPtrsPackedHost.extent (0) != 0) {
1449 const size_t numOffsets (rowPtrsPackedHost.extent (0));
1450 const size_t valToCheck = rowPtrsPackedHost(numOffsets-1);
1451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1452 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1453 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1454 (numOffsets-1) <<
") = " << valToCheck
1455 <<
" != valuesPacked_wdv.extent(0) = "
1456 << valuesPacked_wdv.extent (0) <<
".");
1457 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1458 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1459 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1460 (numOffsets-1) <<
") = " << valToCheck
1461 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1462 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1470 const bool defaultOptStorage =
1471 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1472 const bool requestOptimizedStorage =
1473 (! params.is_null () &&
1474 params->get (
"Optimize Storage", defaultOptStorage)) ||
1475 (params.is_null () && defaultOptStorage);
1480 if (requestOptimizedStorage) {
1485 std::ostringstream os;
1486 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1487 << myGraph_->k_numRowEntries_.extent(0) << endl;
1488 std::cerr << os.str();
1491 myGraph_->k_numRowEntries_ = row_entries_type ();
1496 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1497 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1498 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1499 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1500 valuesUnpacked_wdv = valuesPacked_wdv;
1502 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1503 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1507 std::ostringstream os;
1508 os << *prefix <<
"User requested NOT to optimize storage"
1510 std::cerr << os.str();
1515 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1520 using ::Tpetra::Details::ProfilingRegion;
1521 using Teuchos::ArrayRCP;
1522 using Teuchos::Array;
1523 using Teuchos::null;
1527 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1528 using non_const_row_map_type =
typename row_map_type::non_const_type;
1529 using values_type =
typename local_matrix_device_type::values_type;
1530 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1531 const size_t lclNumRows = getLocalNumRows();
1534 std::unique_ptr<std::string> prefix;
1536 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1537 std::ostringstream os;
1538 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1539 std::cerr << os.str ();
1551 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1552 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1553 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1555 row_map_type k_ptrs;
1561 bool requestOptimizedStorage =
true;
1562 const bool default_OptimizeStorage =
1563 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1564 if (! params.is_null() &&
1565 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1566 requestOptimizedStorage =
false;
1573 if (! staticGraph_->isStorageOptimized () &&
1574 requestOptimizedStorage) {
1576 (
true, std::runtime_error,
"You requested optimized storage "
1577 "by setting the \"Optimize Storage\" flag to \"true\" in "
1578 "the ParameterList, or by virtue of default behavior. "
1579 "However, the associated CrsGraph was filled separately and "
1580 "requested not to optimize storage. Therefore, the "
1581 "CrsMatrix cannot optimize storage.");
1582 requestOptimizedStorage =
false;
1607 if (nodeNumEntries != nodeNumAllocated) {
1609 std::ostringstream os;
1610 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1611 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1613 std::cerr << os.str();
1618 std::ostringstream os;
1619 os << *prefix <<
"Allocate packed row offsets: "
1620 << (lclNumRows+1) << endl;
1621 std::cerr << os.str();
1623 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1628 size_t lclTotalNumEntries = 0;
1631 typename row_entries_type::const_type numRowEnt_h =
1632 staticGraph_->k_numRowEntries_;
1634 lclTotalNumEntries =
1641 std::ostringstream os;
1642 os << *prefix <<
"Allocate packed values: "
1643 << lclTotalNumEntries << endl;
1644 std::cerr << os.str ();
1646 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1650 typename values_type::non_const_type,
1651 typename values_type::const_type,
1652 typename row_map_type::non_const_type,
1653 typename row_map_type::const_type> valsPacker
1654 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1655 tmpk_ptrs, k_rowPtrs);
1658 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1659 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1660 range_type (0, lclNumRows), valsPacker);
1664 valuesPacked_wdv = valuesUnpacked_wdv;
1666 std::ostringstream os;
1667 os << *prefix <<
"Storage already packed: "
1668 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1669 std::cerr << os.str();
1674 if (requestOptimizedStorage) {
1677 valuesUnpacked_wdv = valuesPacked_wdv;
1679 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1683 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1688 const typename crs_graph_type::SLocalGlobalViews& newInds,
1689 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1690 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1691 const ELocalGlobal lg,
1692 const ELocalGlobal I)
1694 const size_t oldNumEnt = rowInfo.numEntries;
1695 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1701 if (numInserted > 0) {
1702 const size_t startOffset = oldNumEnt;
1703 memcpy ((
void*) &oldRowVals[startOffset], &newRowVals[0],
1704 numInserted *
sizeof (impl_scalar_type));
1708 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1712 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1713 const Teuchos::ArrayView<const Scalar>& values,
1717 const char tfecfFuncName[] =
"insertLocalValues: ";
1719 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1720 (! this->isFillActive (), std::runtime_error,
1721 "Fill is not active. After calling fillComplete, you must call "
1722 "resumeFill before you may insert entries into the matrix again.");
1723 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1724 (this->isStaticGraph (), std::runtime_error,
1725 "Cannot insert indices with static graph; use replaceLocalValues() "
1729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1730 (graph.
colMap_.is_null (), std::runtime_error,
1731 "Cannot insert local indices without a column map.");
1732 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1734 std::runtime_error,
"Graph indices are global; use "
1735 "insertGlobalValues().");
1736 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1737 (values.size () != indices.size (), std::runtime_error,
1738 "values.size() = " << values.size ()
1739 <<
" != indices.size() = " << indices.size () <<
".");
1740 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1741 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1742 "Local row index " << lclRow <<
" does not belong to this process.");
1744 if (! graph.indicesAreAllocated ()) {
1748 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1751 #ifdef HAVE_TPETRA_DEBUG
1752 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1757 using Teuchos::toString;
1760 Teuchos::Array<LocalOrdinal> badColInds;
1761 bool allInColMap =
true;
1762 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1764 allInColMap =
false;
1765 badColInds.push_back (indices[k]);
1768 if (! allInColMap) {
1769 std::ostringstream os;
1770 os <<
"You attempted to insert entries in owned row " << lclRow
1771 <<
", at the following column indices: " << toString (indices)
1773 os <<
"Of those, the following indices are not in the column Map on "
1774 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1775 "the matrix has a column Map already, it is invalid to insert "
1776 "entries at those locations.";
1777 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1778 (
true, std::invalid_argument, os.str ());
1781 #endif // HAVE_TPETRA_DEBUG
1785 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1787 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1788 valsView[offset] += values[k]; };
1789 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1790 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1791 }
else if (CM ==
INSERT) {
1792 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1793 valsView[offset] = values[k]; };
1794 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1795 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1797 std::ostringstream os;
1798 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1799 <<
"but this has not been implemented." << endl;
1800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1801 (
true, std::invalid_argument, os.str ());
1805 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1809 const LocalOrdinal numEnt,
1810 const Scalar vals[],
1811 const LocalOrdinal cols[],
1814 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1815 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1816 this->insertLocalValues (localRow, colsT, valsT, CM);
1819 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1824 const GlobalOrdinal gblColInds[],
1826 const size_t numInputEnt)
1828 #ifdef HAVE_TPETRA_DEBUG
1829 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1831 const size_t curNumEnt = rowInfo.numEntries;
1832 #endif // HAVE_TPETRA_DEBUG
1834 if (! graph.indicesAreAllocated ()) {
1837 using ::Tpetra::Details::Behavior;
1838 const bool verbose = Behavior::verbose(
"CrsMatrix");
1839 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1844 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1847 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1848 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1849 valsView[offset] += vals[k];
1851 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1852 #ifdef HAVE_TPETRA_DEBUG
1858 #ifdef HAVE_TPETRA_DEBUG
1859 size_t newNumEnt = curNumEnt + numInserted;
1860 const size_t chkNewNumEnt =
1862 if (chkNewNumEnt != newNumEnt) {
1863 std::ostringstream os;
1864 os << std::endl <<
"newNumEnt = " << newNumEnt
1865 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1866 <<
") = " << chkNewNumEnt <<
"." << std::endl
1867 <<
"\torigNumEnt: " << origNumEnt << std::endl
1868 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1869 <<
"\tgblColInds: [";
1870 for (
size_t k = 0; k < numInputEnt; ++k) {
1871 os << gblColInds[k];
1872 if (k +
size_t (1) < numInputEnt) {
1876 os <<
"]" << std::endl
1878 for (
size_t k = 0; k < numInputEnt; ++k) {
1880 if (k +
size_t (1) < numInputEnt) {
1884 os <<
"]" << std::endl;
1886 if (this->supportsRowViews ()) {
1887 values_host_view_type vals2;
1888 if (this->isGloballyIndexed ()) {
1889 global_inds_host_view_type gblColInds2;
1890 const GlobalOrdinal gblRow =
1891 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1893 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1894 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1898 bool getViewThrew =
false;
1900 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1902 catch (std::exception& e) {
1903 getViewThrew =
true;
1904 os <<
"getGlobalRowView threw exception:" << std::endl
1905 << e.what () << std::endl;
1907 if (! getViewThrew) {
1908 os <<
"\tNew global column indices: ";
1909 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1910 os << gblColInds2[jjj] <<
" ";
1912 os <<
"\tNew values: ";
1913 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1914 os << vals2[jjj] <<
" ";
1919 else if (this->isLocallyIndexed ()) {
1920 local_inds_host_view_type lclColInds2;
1921 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1922 os <<
"\tNew local column indices: ";
1923 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1924 os << lclColInds2[jjj] <<
" ";
1926 os <<
"\tNew values: ";
1927 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1928 os << vals2[jjj] <<
" ";
1933 os <<
"Please report this bug to the Tpetra developers.";
1934 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1935 (
true, std::logic_error, os.str ());
1937 #endif // HAVE_TPETRA_DEBUG
1940 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1944 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1945 const Teuchos::ArrayView<const Scalar>& values)
1947 using Teuchos::toString;
1950 typedef LocalOrdinal LO;
1951 typedef GlobalOrdinal GO;
1952 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
1953 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
1954 const char tfecfFuncName[] =
"insertGlobalValues: ";
1956 #ifdef HAVE_TPETRA_DEBUG
1957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1958 (values.size () != indices.size (), std::runtime_error,
1959 "values.size() = " << values.size () <<
" != indices.size() = "
1960 << indices.size () <<
".");
1961 #endif // HAVE_TPETRA_DEBUG
1965 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
1968 if (lclRow == OTLO::invalid ()) {
1975 this->insertNonownedGlobalValues (gblRow, indices, values);
1978 if (this->isStaticGraph ()) {
1980 const int myRank = rowMap.getComm ()->getRank ();
1981 const int numProcs = rowMap.getComm ()->getSize ();
1982 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1983 (
true, std::runtime_error,
1984 "The matrix was constructed with a constant (\"static\") graph, "
1985 "yet the given global row index " << gblRow <<
" is in the row "
1986 "Map on the calling process (with rank " << myRank <<
", of " <<
1987 numProcs <<
" process(es)). In this case, you may not insert "
1988 "new entries into rows owned by the calling process.");
1992 const IST*
const inputVals =
1993 reinterpret_cast<const IST*
> (values.getRawPtr ());
1994 const GO*
const inputGblColInds = indices.getRawPtr ();
1995 const size_t numInputEnt = indices.size ();
2004 if (! graph.
colMap_.is_null ()) {
2010 #ifdef HAVE_TPETRA_DEBUG
2011 Teuchos::Array<GO> badColInds;
2012 #endif // HAVE_TPETRA_DEBUG
2013 const size_type numEntriesToInsert = indices.size ();
2014 bool allInColMap =
true;
2015 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2017 allInColMap =
false;
2018 #ifdef HAVE_TPETRA_DEBUG
2019 badColInds.push_back (indices[k]);
2022 #endif // HAVE_TPETRA_DEBUG
2025 if (! allInColMap) {
2026 std::ostringstream os;
2027 os <<
"You attempted to insert entries in owned row " << gblRow
2028 <<
", at the following column indices: " << toString (indices)
2030 #ifdef HAVE_TPETRA_DEBUG
2031 os <<
"Of those, the following indices are not in the column Map "
2032 "on this process: " << toString (badColInds) <<
"." << endl
2033 <<
"Since the matrix has a column Map already, it is invalid "
2034 "to insert entries at those locations.";
2036 os <<
"At least one of those indices is not in the column Map "
2037 "on this process." << endl <<
"It is invalid to insert into "
2038 "columns not in the column Map on the process that owns the "
2040 #endif // HAVE_TPETRA_DEBUG
2041 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2042 (
true, std::invalid_argument, os.str ());
2046 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2047 inputVals, numInputEnt);
2052 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2056 const LocalOrdinal numEnt,
2057 const Scalar vals[],
2058 const GlobalOrdinal inds[])
2060 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2061 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2062 this->insertGlobalValues (globalRow, indsT, valsT);
2066 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2070 const GlobalOrdinal gblRow,
2071 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2072 const Teuchos::ArrayView<const Scalar>& values,
2075 typedef impl_scalar_type IST;
2076 typedef LocalOrdinal LO;
2077 typedef GlobalOrdinal GO;
2078 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2079 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2082 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2083 (values.size () != indices.size (), std::runtime_error,
2084 "values.size() = " << values.size () <<
" != indices.size() = "
2085 << indices.size () <<
".");
2090 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2091 const LO lclRow = rowMap.getLocalElement (gblRow);
2092 if (lclRow == OTLO::invalid ()) {
2099 this->insertNonownedGlobalValues (gblRow, indices, values);
2102 if (this->isStaticGraph ()) {
2104 const int myRank = rowMap.getComm ()->getRank ();
2105 const int numProcs = rowMap.getComm ()->getSize ();
2106 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2107 (
true, std::runtime_error,
2108 "The matrix was constructed with a constant (\"static\") graph, "
2109 "yet the given global row index " << gblRow <<
" is in the row "
2110 "Map on the calling process (with rank " << myRank <<
", of " <<
2111 numProcs <<
" process(es)). In this case, you may not insert "
2112 "new entries into rows owned by the calling process.");
2115 crs_graph_type& graph = * (this->myGraph_);
2116 const IST*
const inputVals =
2117 reinterpret_cast<const IST*
> (values.getRawPtr ());
2118 const GO*
const inputGblColInds = indices.getRawPtr ();
2119 const size_t numInputEnt = indices.size ();
2120 RowInfo rowInfo = graph.getRowInfo (lclRow);
2122 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2129 const map_type& colMap = * (graph.colMap_);
2130 size_t curOffset = 0;
2131 while (curOffset < numInputEnt) {
2135 Teuchos::Array<LO> lclIndices;
2136 size_t endOffset = curOffset;
2137 for ( ; endOffset < numInputEnt; ++endOffset) {
2138 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2139 if (lclIndex != OTLO::invalid())
2140 lclIndices.push_back(lclIndex);
2147 const LO numIndInSeq = (endOffset - curOffset);
2148 if (numIndInSeq != 0) {
2149 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2155 const bool invariant = endOffset == numInputEnt ||
2156 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2157 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2158 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2160 curOffset = endOffset + 1;
2163 else if (! graph.colMap_.is_null ()) {
2164 const map_type& colMap = * (graph.colMap_);
2165 size_t curOffset = 0;
2166 while (curOffset < numInputEnt) {
2170 size_t endOffset = curOffset;
2171 for ( ; endOffset < numInputEnt &&
2172 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2178 const LO numIndInSeq = (endOffset - curOffset);
2179 if (numIndInSeq != 0) {
2180 rowInfo = graph.getRowInfo(lclRow);
2181 this->insertGlobalValuesImpl (graph, rowInfo,
2182 inputGblColInds + curOffset,
2183 inputVals + curOffset,
2190 const bool invariant = endOffset == numInputEnt ||
2191 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2192 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2193 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2195 curOffset = endOffset + 1;
2199 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2200 inputVals, numInputEnt);
2205 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2207 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2208 insertGlobalValuesFilteredChecked(
2209 const GlobalOrdinal gblRow,
2210 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2211 const Teuchos::ArrayView<const Scalar>& values,
2212 const char*
const prefix,
2220 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2222 catch(std::exception& e) {
2223 std::ostringstream os;
2225 const size_t maxNumToPrint =
2227 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2228 "exception: " << e.what() << endl
2229 <<
"Global row index: " << gblRow << endl;
2237 os <<
": insertGlobalValuesFiltered threw an exception: "
2240 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2244 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2250 const LocalOrdinal inds[],
2252 const LocalOrdinal numElts)
2254 typedef LocalOrdinal LO;
2255 typedef GlobalOrdinal GO;
2256 const bool sorted = graph.
isSorted ();
2266 for (LO j = 0; j < numElts; ++j) {
2267 const LO lclColInd = inds[j];
2268 const size_t offset =
2269 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2270 lclColInd, hint, sorted);
2271 if (offset != rowInfo.numEntries) {
2272 rowVals[offset] = newVals[j];
2279 if (graph.
colMap_.is_null ()) {
2280 return Teuchos::OrdinalTraits<LO>::invalid ();
2288 for (LO j = 0; j < numElts; ++j) {
2290 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2291 const size_t offset =
2292 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2293 gblColInd, hint, sorted);
2294 if (offset != rowInfo.numEntries) {
2295 rowVals[offset] = newVals[j];
2314 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2318 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2319 const Teuchos::ArrayView<const Scalar>& vals)
2321 typedef LocalOrdinal LO;
2323 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2324 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2325 return Teuchos::OrdinalTraits<LO>::invalid ();
2327 const LO*
const inputInds = lclCols.getRawPtr ();
2328 const Scalar*
const inputVals = vals.getRawPtr ();
2329 return this->replaceLocalValues (localRow, numInputEnt,
2330 inputVals, inputInds);
2333 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2339 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2340 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2343 const LO numInputEnt = inputInds.extent(0);
2344 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2345 return Teuchos::OrdinalTraits<LO>::invalid();
2347 const Scalar*
const inVals =
2348 reinterpret_cast<const Scalar*
>(inputVals.data());
2349 return this->replaceLocalValues(localRow, numInputEnt,
2350 inVals, inputInds.data());
2353 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2357 const LocalOrdinal numEnt,
2358 const Scalar inputVals[],
2359 const LocalOrdinal inputCols[])
2362 typedef LocalOrdinal LO;
2364 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2366 return Teuchos::OrdinalTraits<LO>::invalid ();
2371 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2374 return static_cast<LO
> (0);
2376 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2377 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2378 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2379 inputCols, inVals, numEnt);
2382 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2388 const GlobalOrdinal inds[],
2390 const LocalOrdinal numElts)
2392 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2394 [&](
size_t const k,
size_t const ,
size_t const offset) {
2395 rowVals[offset] = newVals[k];
2397 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2401 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2405 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2406 const Teuchos::ArrayView<const Scalar>& inputVals)
2408 typedef LocalOrdinal LO;
2410 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2411 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2412 return Teuchos::OrdinalTraits<LO>::invalid ();
2414 return this->replaceGlobalValues (globalRow, numInputEnt,
2415 inputVals.getRawPtr (),
2416 inputGblColInds.getRawPtr ());
2419 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2423 const LocalOrdinal numEnt,
2424 const Scalar inputVals[],
2425 const GlobalOrdinal inputGblColInds[])
2428 typedef LocalOrdinal LO;
2430 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2432 return Teuchos::OrdinalTraits<LO>::invalid ();
2437 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2440 return static_cast<LO
> (0);
2443 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2444 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2445 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2446 inputGblColInds, inVals, numEnt);
2449 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2455 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2456 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2465 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2466 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2467 return Teuchos::OrdinalTraits<LO>::invalid();
2469 const Scalar*
const inVals =
2470 reinterpret_cast<const Scalar*
>(inputVals.data());
2471 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2475 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2481 const GlobalOrdinal inds[],
2483 const LocalOrdinal numElts,
2486 typedef LocalOrdinal LO;
2487 typedef GlobalOrdinal GO;
2489 const bool sorted = graph.
isSorted ();
2498 if (graph.
colMap_.is_null ()) {
2509 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2511 for (LO j = 0; j < numElts; ++j) {
2513 if (lclColInd != LINV) {
2514 const size_t offset =
2515 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2516 lclColInd, hint, sorted);
2517 if (offset != rowInfo.numEntries) {
2519 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2522 rowVals[offset] += newVals[j];
2535 for (LO j = 0; j < numElts; ++j) {
2536 const GO gblColInd = inds[j];
2537 const size_t offset =
2538 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2539 gblColInd, hint, sorted);
2540 if (offset != rowInfo.numEntries) {
2542 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2545 rowVals[offset] += newVals[j];
2559 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2563 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2564 const Teuchos::ArrayView<const Scalar>& inputVals,
2567 typedef LocalOrdinal LO;
2569 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2570 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2571 return Teuchos::OrdinalTraits<LO>::invalid ();
2573 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2574 inputVals.getRawPtr (),
2575 inputGblColInds.getRawPtr (),
2579 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2583 const LocalOrdinal numInputEnt,
2584 const Scalar inputVals[],
2585 const GlobalOrdinal inputGblColInds[],
2589 typedef LocalOrdinal LO;
2590 typedef GlobalOrdinal GO;
2592 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2594 return Teuchos::OrdinalTraits<LO>::invalid ();
2599 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2604 using Teuchos::ArrayView;
2605 ArrayView<const GO> inputGblColInds_av(
2606 numInputEnt == 0 ?
nullptr : inputGblColInds,
2608 ArrayView<const Scalar> inputVals_av(
2609 numInputEnt == 0 ?
nullptr :
2610 inputVals, numInputEnt);
2615 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2626 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2627 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2628 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2629 inputGblColInds, inVals,
2630 numInputEnt, atomic);
2634 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2638 const LocalOrdinal numInputEnt,
2639 const impl_scalar_type inputVals[],
2640 const LocalOrdinal inputCols[],
2641 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2644 using Tpetra::Details::OrdinalTraits;
2645 typedef LocalOrdinal LO;
2647 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2649 return Teuchos::OrdinalTraits<LO>::invalid ();
2651 const crs_graph_type& graph = * (this->staticGraph_);
2652 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2654 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2657 return static_cast<LO
> (0);
2659 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2660 return this->transformLocalValues (curRowVals.data (), graph,
2661 rowInfo, inputCols, inputVals,
2662 numInputEnt, f, atomic);
2665 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2667 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2668 transformGlobalValues (
const GlobalOrdinal gblRow,
2669 const LocalOrdinal numInputEnt,
2670 const impl_scalar_type inputVals[],
2671 const GlobalOrdinal inputCols[],
2672 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2675 using Tpetra::Details::OrdinalTraits;
2676 typedef LocalOrdinal LO;
2678 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2680 return OrdinalTraits<LO>::invalid ();
2682 const crs_graph_type& graph = * (this->staticGraph_);
2683 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2685 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2688 return static_cast<LO
> (0);
2690 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2691 return this->transformGlobalValues (curRowVals.data (), graph,
2692 rowInfo, inputCols, inputVals,
2693 numInputEnt, f, atomic);
2696 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2698 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2699 transformLocalValues (impl_scalar_type rowVals[],
2700 const crs_graph_type& graph,
2701 const RowInfo& rowInfo,
2702 const LocalOrdinal inds[],
2703 const impl_scalar_type newVals[],
2704 const LocalOrdinal numElts,
2705 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2708 typedef impl_scalar_type ST;
2709 typedef LocalOrdinal LO;
2710 typedef GlobalOrdinal GO;
2717 const bool sorted = graph.isSorted ();
2722 if (graph.isLocallyIndexed ()) {
2725 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2727 for (LO j = 0; j < numElts; ++j) {
2728 const LO lclColInd = inds[j];
2729 const size_t offset =
2730 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2731 lclColInd, hint, sorted);
2732 if (offset != rowInfo.numEntries) {
2741 ST*
const dest = &rowVals[offset];
2742 (void) atomic_binary_function_update (dest, newVals[j], f);
2746 rowVals[offset] = f (rowVals[offset], newVals[j]);
2753 else if (graph.isGloballyIndexed ()) {
2757 if (graph.colMap_.is_null ()) {
2764 const map_type& colMap = * (graph.colMap_);
2767 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2769 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2770 for (LO j = 0; j < numElts; ++j) {
2771 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2772 if (gblColInd != GINV) {
2773 const size_t offset =
2774 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2775 gblColInd, hint, sorted);
2776 if (offset != rowInfo.numEntries) {
2785 ST*
const dest = &rowVals[offset];
2786 (void) atomic_binary_function_update (dest, newVals[j], f);
2790 rowVals[offset] = f (rowVals[offset], newVals[j]);
2805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2807 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2808 transformGlobalValues (impl_scalar_type rowVals[],
2809 const crs_graph_type& graph,
2810 const RowInfo& rowInfo,
2811 const GlobalOrdinal inds[],
2812 const impl_scalar_type newVals[],
2813 const LocalOrdinal numElts,
2814 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2817 typedef impl_scalar_type ST;
2818 typedef LocalOrdinal LO;
2819 typedef GlobalOrdinal GO;
2826 const bool sorted = graph.isSorted ();
2831 if (graph.isGloballyIndexed ()) {
2834 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2836 for (LO j = 0; j < numElts; ++j) {
2837 const GO gblColInd = inds[j];
2838 const size_t offset =
2839 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2840 gblColInd, hint, sorted);
2841 if (offset != rowInfo.numEntries) {
2850 ST*
const dest = &rowVals[offset];
2851 (void) atomic_binary_function_update (dest, newVals[j], f);
2855 rowVals[offset] = f (rowVals[offset], newVals[j]);
2862 else if (graph.isLocallyIndexed ()) {
2866 if (graph.colMap_.is_null ()) {
2872 const map_type& colMap = * (graph.colMap_);
2875 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2877 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2878 for (LO j = 0; j < numElts; ++j) {
2879 const LO lclColInd = colMap.getLocalElement (inds[j]);
2880 if (lclColInd != LINV) {
2881 const size_t offset =
2882 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2883 lclColInd, hint, sorted);
2884 if (offset != rowInfo.numEntries) {
2893 ST*
const dest = &rowVals[offset];
2894 (void) atomic_binary_function_update (dest, newVals[j], f);
2898 rowVals[offset] = f (rowVals[offset], newVals[j]);
2913 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2919 const LocalOrdinal inds[],
2921 const LocalOrdinal numElts,
2924 typedef LocalOrdinal LO;
2925 typedef GlobalOrdinal GO;
2927 const bool sorted = graph.
isSorted ();
2937 for (LO j = 0; j < numElts; ++j) {
2938 const LO lclColInd = inds[j];
2939 const size_t offset =
2940 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2941 lclColInd, hint, sorted);
2942 if (offset != rowInfo.numEntries) {
2944 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2947 rowVals[offset] += newVals[j];
2955 if (graph.
colMap_.is_null ()) {
2956 return Teuchos::OrdinalTraits<LO>::invalid ();
2964 for (LO j = 0; j < numElts; ++j) {
2966 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2967 const size_t offset =
2968 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2969 gblColInd, hint, sorted);
2970 if (offset != rowInfo.numEntries) {
2972 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2975 rowVals[offset] += newVals[j];
2995 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2999 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3000 const Teuchos::ArrayView<const Scalar>& values,
3004 const LO numInputEnt =
static_cast<LO
>(indices.size());
3005 if (static_cast<LO>(values.size()) != numInputEnt) {
3006 return Teuchos::OrdinalTraits<LO>::invalid();
3008 const LO*
const inputInds = indices.getRawPtr();
3009 const scalar_type*
const inputVals = values.getRawPtr();
3010 return this->sumIntoLocalValues(localRow, numInputEnt,
3011 inputVals, inputInds, atomic);
3014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3020 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3021 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3025 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3026 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3027 return Teuchos::OrdinalTraits<LO>::invalid();
3030 reinterpret_cast<const scalar_type*
>(inputVals.data());
3031 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3032 inputInds.data(), atomic);
3035 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3039 const LocalOrdinal numEnt,
3040 const Scalar vals[],
3041 const LocalOrdinal cols[],
3045 typedef LocalOrdinal LO;
3047 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3049 return Teuchos::OrdinalTraits<LO>::invalid ();
3054 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3057 return static_cast<LO
> (0);
3059 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3060 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3061 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3062 cols, inputVals, numEnt, atomic);
3065 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3067 values_dualv_type::t_host::const_type
3071 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3072 return typename values_dualv_type::t_host::const_type ();
3074 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3079 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3081 values_dualv_type::t_host
3085 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3086 return typename values_dualv_type::t_host ();
3088 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3093 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3095 values_dualv_type::t_dev::const_type
3099 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3100 return typename values_dualv_type::t_dev::const_type ();
3102 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3107 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3109 values_dualv_type::t_dev
3113 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3114 return typename values_dualv_type::t_dev ();
3116 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3122 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3126 nonconst_local_inds_host_view_type &indices,
3127 nonconst_values_host_view_type &values,
3128 size_t& numEntries)
const
3130 using Teuchos::ArrayView;
3131 using Teuchos::av_reinterpret_cast;
3132 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3134 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3135 (! this->hasColMap (), std::runtime_error,
3136 "The matrix does not have a column Map yet. This means we don't have "
3137 "local indices for columns yet, so it doesn't make sense to call this "
3138 "method. If the matrix doesn't have a column Map yet, you should call "
3139 "fillComplete on it first.");
3141 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3142 const size_t theNumEntries = rowinfo.numEntries;
3143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3144 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3145 static_cast<size_t> (values.size ()) < theNumEntries,
3146 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3147 theNumEntries <<
" entry/ies, but indices.size() = " <<
3148 indices.size () <<
" and values.size() = " << values.size () <<
".");
3149 numEntries = theNumEntries;
3151 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3152 if (staticGraph_->isLocallyIndexed ()) {
3153 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3154 auto curVals = getValuesViewHost(rowinfo);
3156 for (
size_t j = 0; j < theNumEntries; ++j) {
3157 values[j] = curVals[j];
3158 indices[j] = curLclInds(j);
3161 else if (staticGraph_->isGloballyIndexed ()) {
3163 const map_type& colMap = * (staticGraph_->colMap_);
3164 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3165 auto curVals = getValuesViewHost(rowinfo);
3167 for (
size_t j = 0; j < theNumEntries; ++j) {
3168 values[j] = curVals[j];
3176 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3180 nonconst_global_inds_host_view_type &indices,
3181 nonconst_values_host_view_type &values,
3182 size_t& numEntries)
const
3184 using Teuchos::ArrayView;
3185 using Teuchos::av_reinterpret_cast;
3186 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3189 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3190 const size_t theNumEntries = rowinfo.numEntries;
3191 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3192 static_cast<size_t> (indices.size ()) < theNumEntries ||
3193 static_cast<size_t> (values.size ()) < theNumEntries,
3194 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3195 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3196 indices.size () <<
" and values.size() = " << values.size () <<
".");
3197 numEntries = theNumEntries;
3199 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3200 if (staticGraph_->isLocallyIndexed ()) {
3201 const map_type& colMap = * (staticGraph_->colMap_);
3202 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3203 auto curVals = getValuesViewHost(rowinfo);
3205 for (
size_t j = 0; j < theNumEntries; ++j) {
3206 values[j] = curVals[j];
3210 else if (staticGraph_->isGloballyIndexed ()) {
3211 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3212 auto curVals = getValuesViewHost(rowinfo);
3214 for (
size_t j = 0; j < theNumEntries; ++j) {
3215 values[j] = curVals[j];
3216 indices[j] = curGblInds(j);
3223 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3227 local_inds_host_view_type &indices,
3228 values_host_view_type &values)
const
3230 const char tfecfFuncName[] =
"getLocalRowView: ";
3232 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3233 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3234 "its indices as global indices, so you cannot get a view with local "
3235 "column indices. If the matrix has a column Map, you may call "
3236 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3237 "a view with global column indices by calling getGlobalRowCopy().");
3239 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3240 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3241 rowInfo.numEntries > 0) {
3242 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3246 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3253 indices = local_inds_host_view_type();
3254 values = values_host_view_type();
3257 #ifdef HAVE_TPETRA_DEBUG
3258 const char suffix[] =
". This should never happen. Please report this "
3259 "bug to the Tpetra developers.";
3260 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3261 (static_cast<size_t> (indices.size ()) !=
3262 static_cast<size_t> (values.size ()), std::logic_error,
3263 "At the end of this method, for local row " << localRow <<
", "
3264 "indices.size() = " << indices.size () <<
" != values.size () = "
3265 << values.size () << suffix);
3266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3267 (static_cast<size_t> (indices.size ()) !=
3268 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3269 "At the end of this method, for local row " << localRow <<
", "
3270 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3271 << rowInfo.numEntries << suffix);
3272 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3273 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3274 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3275 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3276 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3277 expectedNumEntries << suffix);
3278 #endif // HAVE_TPETRA_DEBUG
3282 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3286 global_inds_host_view_type &indices,
3287 values_host_view_type &values)
const
3289 const char tfecfFuncName[] =
"getGlobalRowView: ";
3291 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3292 isLocallyIndexed (), std::runtime_error,
3293 "The matrix is locally indexed, so we cannot return a view of the row "
3294 "with global column indices. Use getGlobalRowCopy() instead.");
3299 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3300 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3301 rowInfo.numEntries > 0) {
3302 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3305 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3310 indices = global_inds_host_view_type();
3311 values = values_host_view_type();
3314 #ifdef HAVE_TPETRA_DEBUG
3315 const char suffix[] =
". This should never happen. Please report this "
3316 "bug to the Tpetra developers.";
3317 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3318 (static_cast<size_t> (indices.size ()) !=
3319 static_cast<size_t> (values.size ()), std::logic_error,
3320 "At the end of this method, for global row " << globalRow <<
", "
3321 "indices.size() = " << indices.size () <<
" != values.size () = "
3322 << values.size () << suffix);
3323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3324 (static_cast<size_t> (indices.size ()) !=
3325 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3326 "At the end of this method, for global row " << globalRow <<
", "
3327 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3328 << rowInfo.numEntries << suffix);
3329 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3331 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3332 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3333 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3334 " " << expectedNumEntries << suffix);
3335 #endif // HAVE_TPETRA_DEBUG
3339 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3346 const size_t nlrs = staticGraph_->getLocalNumRows ();
3347 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3348 if (! staticGraph_->indicesAreAllocated () ||
3349 nlrs == 0 || numEntries == 0) {
3354 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3355 KokkosBlas::scal(vals, theAlpha, vals);
3360 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3371 const size_t numEntries = staticGraph_->getLocalNumEntries();
3372 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3380 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3384 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3387 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3388 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3389 const typename local_matrix_device_type::values_type& values)
3392 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3393 const char tfecfFuncName[] =
"setAllValues: ";
3394 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3395 (columnIndices.size () != values.size (), std::invalid_argument,
3396 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3397 " = " << values.size () <<
".");
3398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3399 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3402 myGraph_->setAllIndices (rowPointers, columnIndices);
3404 catch (std::exception &e) {
3405 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3406 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3407 "exception: " << e.what ());
3414 auto lclGraph = myGraph_->getLocalGraphDevice ();
3415 const size_t numEnt = lclGraph.entries.extent (0);
3416 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3417 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3418 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3419 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3420 "local graph. Please report this bug to the Tpetra developers.");
3423 valuesUnpacked_wdv = valuesPacked_wdv;
3427 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3429 checkInternalState ();
3432 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3438 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3440 auto graph = localDeviceMatrix.graph;
3443 auto rows = graph.row_map;
3444 auto columns = graph.entries;
3445 auto values = localDeviceMatrix.values;
3447 setAllValues(rows,columns,values);
3450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3454 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3455 const Teuchos::ArrayRCP<Scalar>& val)
3457 using Kokkos::Compat::getKokkosViewDeepCopy;
3458 using Teuchos::ArrayRCP;
3459 using Teuchos::av_reinterpret_cast;
3462 typedef typename local_graph_device_type::row_map_type row_map_type;
3464 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3470 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3471 Kokkos::View<
const size_t*,
3472 typename row_map_type::array_layout,
3474 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3478 (ptrNative.extent (0) != ptrSizeT.extent (0),
3479 std::logic_error,
"ptrNative.extent(0) = " <<
3480 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3481 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3482 "Tpetra developers.");
3484 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3485 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3486 this->setAllValues (ptrNative, indIn, valIn);
3489 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3494 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3495 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3496 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3503 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3504 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3505 offsets.resize (lclNumRows);
3511 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3516 Kokkos::MemoryUnmanaged> output_type;
3517 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3518 staticGraph_->getLocalDiagOffsets (offsetsOut);
3521 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3522 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3523 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3524 Kokkos::MemoryUnmanaged> output_type;
3525 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3531 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3536 using Teuchos::ArrayRCP;
3537 using Teuchos::ArrayView;
3538 using Teuchos::av_reinterpret_cast;
3539 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3544 staticGraph_.is_null (), std::runtime_error,
3545 "This method requires that the matrix have a graph.");
3546 auto rowMapPtr = this->getRowMap ();
3547 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3553 auto colMapPtr = this->getColMap ();
3554 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3555 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3556 "This method requires that the matrix have a column Map.");
3557 const map_type& rowMap = * rowMapPtr;
3558 const map_type& colMap = * colMapPtr;
3559 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3561 #ifdef HAVE_TPETRA_DEBUG
3564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3565 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3566 "The input Vector's Map must be compatible with the CrsMatrix's row "
3567 "Map. You may check this by using Map's isCompatible method: "
3568 "diag.getMap ()->isCompatible (A.getRowMap ());");
3569 #endif // HAVE_TPETRA_DEBUG
3573 const auto D_lcl_1d =
3574 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3576 const auto lclRowMap = rowMap.getLocalMap ();
3581 getLocalMatrixDevice ());
3584 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3589 Kokkos::MemoryUnmanaged>& offsets)
const
3591 typedef LocalOrdinal LO;
3593 #ifdef HAVE_TPETRA_DEBUG
3594 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3595 const map_type& rowMap = * (this->getRowMap ());
3598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3599 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3600 "The input Vector's Map must be compatible with (in the sense of Map::"
3601 "isCompatible) the CrsMatrix's row Map.");
3602 #endif // HAVE_TPETRA_DEBUG
3612 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3615 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3617 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3618 getLocalMatrixDevice ());
3621 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3625 const Teuchos::ArrayView<const size_t>& offsets)
const
3627 using LO = LocalOrdinal;
3628 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3631 #ifdef HAVE_TPETRA_DEBUG
3632 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3633 const map_type& rowMap = * (this->getRowMap ());
3636 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3637 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3638 "The input Vector's Map must be compatible with (in the sense of Map::"
3639 "isCompatible) the CrsMatrix's row Map.");
3640 #endif // HAVE_TPETRA_DEBUG
3652 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3654 using host_offsets_view_type =
3655 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3656 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3657 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3659 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3660 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3661 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3663 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3664 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3665 Kokkos::parallel_for
3666 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3667 range_type (0, myNumRows),
3668 [&, INV, h_offsets] (
const LO lclRow) {
3669 lclVecHost1d(lclRow) = STS::zero ();
3670 if (h_offsets[lclRow] != INV) {
3671 auto curRowOffset = rowPtrsPackedHost (lclRow);
3672 lclVecHost1d(lclRow) =
3673 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3680 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3685 using ::Tpetra::Details::ProfilingRegion;
3686 using Teuchos::ArrayRCP;
3687 using Teuchos::ArrayView;
3688 using Teuchos::null;
3691 using Teuchos::rcpFromRef;
3693 const char tfecfFuncName[] =
"leftScale: ";
3695 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3697 RCP<const vec_type> xp;
3698 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3701 auto exporter = this->getCrsGraphRef ().getExporter ();
3702 if (exporter.get () !=
nullptr) {
3703 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3704 tempVec->doImport (x, *exporter,
REPLACE);
3708 xp = rcpFromRef (x);
3711 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3712 xp = rcpFromRef (x);
3715 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3716 (
true, std::invalid_argument,
"x's Map must be the same as "
3717 "either the row Map or the range Map of the CrsMatrix.");
3720 if (this->isFillComplete()) {
3721 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3722 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3725 x_lcl_1d,
false,
false);
3729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3730 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3735 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3740 using ::Tpetra::Details::ProfilingRegion;
3741 using Teuchos::ArrayRCP;
3742 using Teuchos::ArrayView;
3743 using Teuchos::null;
3746 using Teuchos::rcpFromRef;
3748 const char tfecfFuncName[] =
"rightScale: ";
3750 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3752 RCP<const vec_type> xp;
3753 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3756 auto importer = this->getCrsGraphRef ().getImporter ();
3757 if (importer.get () !=
nullptr) {
3758 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3759 tempVec->doImport (x, *importer,
REPLACE);
3763 xp = rcpFromRef (x);
3766 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3767 xp = rcpFromRef (x);
3769 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3770 (
true, std::runtime_error,
"x's Map must be the same as "
3771 "either the domain Map or the column Map of the CrsMatrix.");
3774 if (this->isFillComplete()) {
3775 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3776 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3779 x_lcl_1d,
false,
false);
3783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3784 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3789 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3794 using range_type = Kokkos::RangePolicy<execution_space, local_ordinal_type>;
3795 Kokkos::parallel_reduce(
3796 "getNormInf", range_type(0, equilInfo.rowNorms.extent(0)),
3798 max = equilInfo.rowNorms(i);
3800 Kokkos::Max<mag_type>(myMax));
3802 Teuchos::reduceAll<int, mag_type>(*(getComm()), Teuchos::REDUCE_MAX, myMax,
3803 Teuchos::outArg(totalMax));
3807 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3811 if (assumeSymmetric)
3812 return getNormInf();
3815 using range_type = Kokkos::RangePolicy<execution_space, local_ordinal_type>;
3816 Kokkos::parallel_reduce(
3817 "getNorm1", range_type(0, equilInfo.colNorms.extent(0)),
3819 max = equilInfo.colNorms(i);
3821 Kokkos::Max<mag_type>(myMax));
3823 Teuchos::reduceAll<int, mag_type>(*(getComm()), Teuchos::REDUCE_MAX, myMax,
3824 Teuchos::outArg(totalMax));
3828 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3833 using Teuchos::ArrayView;
3834 using Teuchos::outArg;
3835 using Teuchos::REDUCE_SUM;
3836 using Teuchos::reduceAll;
3844 if (getLocalNumEntries() > 0) {
3845 if (isStorageOptimized ()) {
3848 const size_t numEntries = getLocalNumEntries ();
3849 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3850 for (
size_t k = 0; k < numEntries; ++k) {
3851 auto val = values[k];
3855 const mag_type val_abs = STS::abs (val);
3856 mySum += val_abs * val_abs;
3860 const LocalOrdinal numRows =
3861 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3862 for (LocalOrdinal r = 0; r < numRows; ++r) {
3863 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3864 const size_t numEntries = rowInfo.numEntries;
3865 auto A_r = this->getValuesViewHost(rowInfo);
3866 for (
size_t k = 0; k < numEntries; ++k) {
3868 const mag_type val_abs = STS::abs (val);
3869 mySum += val_abs * val_abs;
3875 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3876 mySum, outArg (totalSum));
3877 return STM::sqrt (totalSum);
3880 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3885 const char tfecfFuncName[] =
"replaceColMap: ";
3889 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3890 myGraph_.is_null (), std::runtime_error,
3891 "This method does not work if the matrix has a const graph. The whole "
3892 "idea of a const graph is that you are not allowed to change it, but "
3893 "this method necessarily must modify the graph, since the graph owns "
3894 "the matrix's column Map.");
3895 myGraph_->replaceColMap (newColMap);
3898 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3902 const Teuchos::RCP<const map_type>& newColMap,
3903 const Teuchos::RCP<const import_type>& newImport,
3904 const bool sortEachRow)
3906 const char tfecfFuncName[] =
"reindexColumns: ";
3907 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3908 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3909 "The input graph is null, but the matrix does not own its graph.");
3911 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3912 const bool sortGraph =
false;
3916 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3917 const LocalOrdinal lclNumRows =
3918 static_cast<LocalOrdinal
> (theGraph.getLocalNumRows ());
3920 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3922 const RowInfo rowInfo = theGraph.getRowInfo (row);
3923 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
3924 auto vals = this->getValuesViewHostNonConst (rowInfo);
3926 sort2 (lclColInds.data (),
3927 lclColInds.data () + rowInfo.numEntries,
3930 theGraph.indicesAreSorted_ =
true;
3934 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3939 const char tfecfFuncName[] =
"replaceDomainMap: ";
3940 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3941 myGraph_.is_null (), std::runtime_error,
3942 "This method does not work if the matrix has a const graph. The whole "
3943 "idea of a const graph is that you are not allowed to change it, but this"
3944 " method necessarily must modify the graph, since the graph owns the "
3945 "matrix's domain Map and Import objects.");
3946 myGraph_->replaceDomainMap (newDomainMap);
3949 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3953 Teuchos::RCP<const import_type>& newImporter)
3955 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3957 myGraph_.is_null (), std::runtime_error,
3958 "This method does not work if the matrix has a const graph. The whole "
3959 "idea of a const graph is that you are not allowed to change it, but this"
3960 " method necessarily must modify the graph, since the graph owns the "
3961 "matrix's domain Map and Import objects.");
3962 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3965 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3970 const char tfecfFuncName[] =
"replaceRangeMap: ";
3971 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3972 myGraph_.is_null (), std::runtime_error,
3973 "This method does not work if the matrix has a const graph. The whole "
3974 "idea of a const graph is that you are not allowed to change it, but this"
3975 " method necessarily must modify the graph, since the graph owns the "
3976 "matrix's domain Map and Import objects.");
3977 myGraph_->replaceRangeMap (newRangeMap);
3980 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3984 Teuchos::RCP<const export_type>& newExporter)
3986 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
3987 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3988 myGraph_.is_null (), std::runtime_error,
3989 "This method does not work if the matrix has a const graph. The whole "
3990 "idea of a const graph is that you are not allowed to change it, but this"
3991 " method necessarily must modify the graph, since the graph owns the "
3992 "matrix's domain Map and Import objects.");
3993 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
3996 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4000 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4001 const Teuchos::ArrayView<const Scalar>& values)
4003 using Teuchos::Array;
4004 typedef GlobalOrdinal GO;
4005 typedef typename Array<GO>::size_type size_type;
4007 const size_type numToInsert = indices.size ();
4010 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4011 Array<GO>& curRowInds = curRow.first;
4012 Array<Scalar>& curRowVals = curRow.second;
4013 const size_type newCapacity = curRowInds.size () + numToInsert;
4014 curRowInds.reserve (newCapacity);
4015 curRowVals.reserve (newCapacity);
4016 for (size_type k = 0; k < numToInsert; ++k) {
4017 curRowInds.push_back (indices[k]);
4018 curRowVals.push_back (values[k]);
4022 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4029 using Teuchos::Comm;
4030 using Teuchos::outArg;
4033 using Teuchos::REDUCE_MAX;
4034 using Teuchos::REDUCE_MIN;
4035 using Teuchos::reduceAll;
4039 typedef GlobalOrdinal GO;
4040 typedef typename Teuchos::Array<GO>::size_type size_type;
4041 const char tfecfFuncName[] =
"globalAssemble: ";
4042 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4044 const bool verbose = Behavior::verbose(
"CrsMatrix");
4045 std::unique_ptr<std::string> prefix;
4047 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4048 std::ostringstream os;
4049 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4051 std::cerr << os.str();
4053 RCP<const Comm<int> > comm = getComm ();
4055 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4056 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4057 "you may call this method.");
4059 const size_t myNumNonlocalRows = nonlocals_.size ();
4066 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4067 int someoneHasNonlocalRows = 0;
4068 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4069 outArg (someoneHasNonlocalRows));
4070 if (someoneHasNonlocalRows == 0) {
4084 RCP<const map_type> nonlocalRowMap;
4085 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4087 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4088 size_type curPos = 0;
4089 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4090 ++mapIter, ++curPos) {
4091 myNonlocalGblRows[curPos] = mapIter->first;
4094 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4095 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4102 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4103 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4104 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4105 merge2 (gblCols_newEnd, vals_newEnd,
4106 gblCols.begin (), gblCols.end (),
4107 vals.begin (), vals.end ());
4108 gblCols.erase (gblCols_newEnd, gblCols.end ());
4109 vals.erase (vals_newEnd, vals.end ());
4110 numEntPerNonlocalRow[curPos] = gblCols.size ();
4121 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4123 auto iter = std::min_element (myNonlocalGblRows.begin (),
4124 myNonlocalGblRows.end ());
4125 if (iter != myNonlocalGblRows.end ()) {
4126 myMinNonlocalGblRow = *iter;
4129 GO gblMinNonlocalGblRow = 0;
4130 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4131 outArg (gblMinNonlocalGblRow));
4132 const GO indexBase = gblMinNonlocalGblRow;
4133 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4134 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4143 std::ostringstream os;
4144 os << *prefix <<
"Create nonlocal matrix" << endl;
4145 std::cerr << os.str();
4147 RCP<crs_matrix_type> nonlocalMatrix =
4148 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4150 size_type curPos = 0;
4151 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4152 ++mapIter, ++curPos) {
4153 const GO gblRow = mapIter->first;
4155 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4156 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4158 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4170 auto origRowMap = this->getRowMap ();
4171 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4173 int isLocallyComplete = 1;
4175 if (origRowMapIsOneToOne) {
4177 std::ostringstream os;
4178 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4179 std::cerr << os.str();
4181 export_type exportToOrig (nonlocalRowMap, origRowMap);
4183 isLocallyComplete = 0;
4186 std::ostringstream os;
4187 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4188 std::cerr << os.str();
4190 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4195 std::ostringstream os;
4196 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4197 std::cerr << os.str();
4204 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4206 isLocallyComplete = 0;
4214 std::ostringstream os;
4215 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4217 std::cerr << os.str();
4219 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4221 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4227 std::ostringstream os;
4228 os << *prefix <<
"Free nonlocalMatrix" << endl;
4229 std::cerr << os.str();
4231 nonlocalMatrix = Teuchos::null;
4235 std::ostringstream os;
4236 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4237 std::cerr << os.str();
4239 import_type importToOrig (oneToOneRowMap, origRowMap);
4240 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4248 std::ostringstream os;
4249 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4250 std::cerr << os.str();
4252 decltype (nonlocals_) newNonlocals;
4253 std::swap (nonlocals_, newNonlocals);
4262 int isGloballyComplete = 0;
4263 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4264 outArg (isGloballyComplete));
4265 TEUCHOS_TEST_FOR_EXCEPTION
4266 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4267 "you called insertGlobalValues with a global row index which is not in "
4268 "the matrix's row Map on any process in its communicator.");
4271 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4276 if (! isStaticGraph ()) {
4277 myGraph_->resumeFill (params);
4280 applyHelper.reset();
4281 fillComplete_ =
false;
4284 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4288 return getCrsGraphRef ().haveGlobalConstants ();
4291 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4296 const char tfecfFuncName[] =
"fillComplete(params): ";
4298 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4299 (this->getCrsGraph ().is_null (), std::logic_error,
4300 "getCrsGraph() returns null. This should not happen at this point. "
4301 "Please report this bug to the Tpetra developers.");
4311 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4312 Teuchos::RCP<const map_type> domainMap = rangeMap;
4313 this->fillComplete (domainMap, rangeMap, params);
4317 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4321 const Teuchos::RCP<const map_type>& rangeMap,
4322 const Teuchos::RCP<Teuchos::ParameterList>& params)
4326 using Teuchos::ArrayRCP;
4330 const char tfecfFuncName[] =
"fillComplete: ";
4331 ProfilingRegion regionFillComplete
4332 (
"Tpetra::CrsMatrix::fillComplete");
4333 const bool verbose = Behavior::verbose(
"CrsMatrix");
4334 std::unique_ptr<std::string> prefix;
4336 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4337 std::ostringstream os;
4338 os << *prefix << endl;
4339 std::cerr << os.str ();
4342 "Tpetra::CrsMatrix::fillCompete",
4345 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4346 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4347 "Matrix fill state must be active (isFillActive() "
4348 "must be true) before you may call fillComplete().");
4349 const int numProcs = this->getComm ()->getSize ();
4359 bool assertNoNonlocalInserts =
false;
4362 bool sortGhosts =
true;
4364 if (! params.is_null ()) {
4365 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4366 assertNoNonlocalInserts);
4367 if (params->isParameter (
"sort column map ghost gids")) {
4368 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4370 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4371 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4376 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4378 if (! this->myGraph_.is_null ()) {
4379 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4382 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4383 if (this->hasColMap ()) {
4384 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4387 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4392 if (needGlobalAssemble) {
4393 this->globalAssemble ();
4396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4397 (numProcs == 1 && nonlocals_.size() > 0,
4398 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4399 "An invalid entry (i.e., with row index not in the row Map) must have "
4400 "been submitted to the CrsMatrix.");
4403 if (this->isStaticGraph ()) {
4411 #ifdef HAVE_TPETRA_DEBUG
4429 const bool domainMapsMatch =
4430 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4431 const bool rangeMapsMatch =
4432 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4434 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4435 (! domainMapsMatch, std::runtime_error,
4436 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4437 "The graph cannot be changed because it was given to the CrsMatrix "
4438 "constructor as const. You can fix this by passing in the graph's "
4439 "domain Map and range Map to the matrix's fillComplete call.");
4441 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4442 (! rangeMapsMatch, std::runtime_error,
4443 "The CrsMatrix's range Map does not match the graph's range Map. "
4444 "The graph cannot be changed because it was given to the CrsMatrix "
4445 "constructor as const. You can fix this by passing in the graph's "
4446 "domain Map and range Map to the matrix's fillComplete call.");
4447 #endif // HAVE_TPETRA_DEBUG
4451 this->fillLocalMatrix (params);
4459 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4462 Teuchos::Array<int> remotePIDs (0);
4463 const bool mustBuildColMap = ! this->hasColMap ();
4464 if (mustBuildColMap) {
4465 this->myGraph_->makeColMap (remotePIDs);
4470 const std::pair<size_t, std::string> makeIndicesLocalResult =
4471 this->myGraph_->makeIndicesLocal(verbose);
4476 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4477 (makeIndicesLocalResult.first != 0, std::runtime_error,
4478 makeIndicesLocalResult.second);
4480 const bool sorted = this->myGraph_->isSorted ();
4481 const bool merged = this->myGraph_->isMerged ();
4482 this->sortAndMergeIndicesAndValues (sorted, merged);
4487 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4491 this->fillLocalGraphAndMatrix (params);
4493 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4494 params->get (
"compute global constants",
true);
4495 if (callGraphComputeGlobalConstants) {
4496 this->myGraph_->computeGlobalConstants ();
4499 this->myGraph_->computeLocalConstants ();
4501 this->myGraph_->fillComplete_ =
true;
4502 this->myGraph_->checkInternalState ();
4507 this->fillComplete_ =
true;
4510 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4512 this->checkInternalState ();
4516 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4520 const Teuchos::RCP<const map_type> & rangeMap,
4521 const Teuchos::RCP<const import_type>& importer,
4522 const Teuchos::RCP<const export_type>& exporter,
4523 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4525 #ifdef HAVE_TPETRA_MMM_TIMINGS
4527 if(!params.is_null())
4528 label = params->get(
"Timer Label",label);
4529 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4530 using Teuchos::TimeMonitor;
4532 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4535 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4536 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4537 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4538 "must be true) before calling fillComplete().");
4539 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4540 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4543 #ifdef HAVE_TPETRA_MMM_TIMINGS
4544 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4547 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4551 #ifdef HAVE_TPETRA_MMM_TIMINGS
4552 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4555 fillLocalGraphAndMatrix (params);
4560 fillComplete_ =
true;
4563 #ifdef HAVE_TPETRA_DEBUG
4564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4565 ": We're at the end of fillComplete(), but isFillActive() is true. "
4566 "Please report this bug to the Tpetra developers.");
4567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4568 ": We're at the end of fillComplete(), but isFillActive() is true. "
4569 "Please report this bug to the Tpetra developers.");
4570 #endif // HAVE_TPETRA_DEBUG
4572 #ifdef HAVE_TPETRA_MMM_TIMINGS
4573 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4576 checkInternalState();
4580 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4586 LocalOrdinal* beg = cols;
4587 LocalOrdinal* end = cols + rowLen;
4588 LocalOrdinal* newend = beg;
4590 LocalOrdinal* cur = beg + 1;
4594 while (cur != end) {
4595 if (*cur != *newend) {
4612 return newend - beg;
4615 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4620 using ::Tpetra::Details::ProfilingRegion;
4621 typedef LocalOrdinal LO;
4622 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4623 host_execution_space;
4624 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4625 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4626 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4628 if (! sorted || ! merged) {
4629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4630 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4631 "\"static\" (const) graph, since the matrix does not own the graph.");
4632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4633 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4634 "this matrix claims ! isStaticGraph(). "
4635 "Please report this bug to the Tpetra developers.");
4636 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4637 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4638 "this method if the graph's storage has already been optimized. "
4639 "Please report this bug to the Tpetra developers.");
4642 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4643 size_t totalNumDups = 0;
4648 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4650 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4651 [=] (
const LO lclRow,
size_t& numDups) {
4652 size_t rowBegin = rowBegins_(lclRow);
4653 size_t rowLen = rowLengths_(lclRow);
4654 LO* cols = cols_.data() + rowBegin;
4657 sort2 (cols, cols + rowLen, vals);
4660 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4661 rowLengths_(lclRow) = newRowLength;
4662 numDups += rowLen - newRowLength;
4675 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4686 using Teuchos::rcp_const_cast;
4687 using Teuchos::rcpFromRef;
4688 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4689 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4695 if (alpha == ZERO) {
4698 }
else if (beta != ONE) {
4712 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4713 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4719 const bool Y_is_overwritten = (beta ==
ZERO);
4722 const bool Y_is_replicated =
4723 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4731 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4738 RCP<const MV> X_colMap;
4739 if (importer.is_null ()) {
4747 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4749 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4754 X_colMap = rcpFromRef (X_in);
4758 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4764 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4767 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4768 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4775 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4782 if (! exporter.is_null ()) {
4783 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4785 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4791 if (Y_is_overwritten) {
4817 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4824 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4828 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4836 if (Y_is_replicated) {
4837 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4842 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4847 const Teuchos::ETransp mode,
4852 using Teuchos::null;
4855 using Teuchos::rcp_const_cast;
4856 using Teuchos::rcpFromRef;
4857 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4860 if (alpha == ZERO) {
4873 else if (beta == ZERO) {
4891 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4892 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4897 const bool Y_is_replicated = (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4898 const bool Y_is_overwritten = (beta ==
ZERO);
4899 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4905 X = rcp (
new MV (X_in, Teuchos::Copy));
4907 X = rcpFromRef (X_in);
4911 if (importer != Teuchos::null) {
4912 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4915 if (importMV_ == null) {
4916 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4919 if (exporter != Teuchos::null) {
4920 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4923 if (exportMV_ == null) {
4924 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4930 if (! exporter.is_null ()) {
4931 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4932 exportMV_->doImport (X_in, *exporter,
INSERT);
4939 if (importer != Teuchos::null) {
4940 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4947 importMV_->putScalar (ZERO);
4949 this->localApply (*X, *importMV_, mode, alpha, ZERO);
4951 if (Y_is_overwritten) {
4968 MV Y (Y_in, Teuchos::Copy);
4969 this->localApply (*X, Y, mode, alpha, beta);
4972 this->localApply (*X, Y_in, mode, alpha, beta);
4979 if (Y_is_replicated) {
4980 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4990 const Teuchos::ETransp mode,
4991 const Scalar& alpha,
4992 const Scalar& beta)
const
4995 using Teuchos::NO_TRANS;
4996 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5003 const char tfecfFuncName[] =
"localApply: ";
5004 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5008 const bool transpose = (mode != Teuchos::NO_TRANS);
5009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5011 getColMap ()->getLocalNumElements (), std::runtime_error,
5012 "NO_TRANS case: X has the wrong number of local rows. "
5014 "getColMap()->getLocalNumElements() = " <<
5015 getColMap ()->getLocalNumElements () <<
".");
5016 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5018 getRowMap ()->getLocalNumElements (), std::runtime_error,
5019 "NO_TRANS case: Y has the wrong number of local rows. "
5021 "getRowMap()->getLocalNumElements() = " <<
5022 getRowMap ()->getLocalNumElements () <<
".");
5023 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5025 getRowMap ()->getLocalNumElements (), std::runtime_error,
5026 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5028 <<
" != getRowMap()->getLocalNumElements() = "
5029 << getRowMap ()->getLocalNumElements () <<
".");
5030 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5032 getColMap ()->getLocalNumElements (), std::runtime_error,
5033 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5035 <<
" != getColMap()->getLocalNumElements() = "
5036 << getColMap ()->getLocalNumElements () <<
".");
5037 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5038 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5039 "fill complete. You must call fillComplete() (possibly with "
5040 "domain and range Map arguments) without an intervening "
5041 "resumeFill() call before you may call this method.");
5042 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5044 std::runtime_error,
"X and Y must be constant stride.");
5049 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5050 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5051 && X_lcl.extent(0) != 0,
5052 std::runtime_error,
"X and Y may not alias one another.");
5055 auto A_lcl = getLocalMatrixDevice();
5057 if(!applyHelper.get()) {
5060 bool useMergePath =
false;
5061 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
5067 if constexpr(std::is_same_v<execution_space, Kokkos::Cuda>) {
5068 LocalOrdinal nrows = getLocalNumRows();
5069 LocalOrdinal maxRowImbalance = 0;
5071 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5074 useMergePath =
true;
5077 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
5078 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
5082 const char* modeKK =
nullptr;
5085 case Teuchos::NO_TRANS:
5086 modeKK = KokkosSparse::NoTranspose;
break;
5087 case Teuchos::TRANS:
5088 modeKK = KokkosSparse::Transpose;
break;
5089 case Teuchos::CONJ_TRANS:
5090 modeKK = KokkosSparse::ConjugateTranspose;
break;
5092 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5095 if(applyHelper->shouldUseIntRowptrs())
5097 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5099 &applyHelper->handle_int, modeKK,
5105 &applyHelper->handle, modeKK,
5110 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5115 Teuchos::ETransp mode,
5120 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5122 TEUCHOS_TEST_FOR_EXCEPTION
5123 (! isFillComplete (), std::runtime_error,
5124 fnName <<
": Cannot call apply() until fillComplete() "
5125 "has been called.");
5127 if (mode == Teuchos::NO_TRANS) {
5128 ProfilingRegion regionNonTranspose (fnName);
5129 this->applyNonTranspose (X, Y, alpha, beta);
5132 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5133 this->applyTranspose (X, Y, mode, alpha, beta);
5138 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5140 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5146 const char tfecfFuncName[] =
"convert: ";
5148 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5149 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5150 "of the conversion) is not fill complete. You must first call "
5151 "fillComplete() (possibly with the domain and range Map) without an "
5152 "intervening call to resumeFill(), before you may call this method.");
5154 RCP<output_matrix_type> newMatrix
5155 (
new output_matrix_type (this->getCrsGraph ()));
5159 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5160 this->getLocalMatrixDevice ().values);
5164 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5170 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5177 const char tfecfFuncName[] =
"checkInternalState: ";
5178 const char err[] =
"Internal state is not consistent. "
5179 "Please report this bug to the Tpetra developers.";
5183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5184 (staticGraph_.is_null (), std::logic_error, err);
5188 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5189 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5190 std::logic_error, err);
5192 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5193 (isFillComplete () && ! staticGraph_->isFillComplete (),
5194 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5195 "but its graph is NOT fill complete.");
5198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5199 (staticGraph_->indicesAreAllocated () &&
5200 staticGraph_->getLocalAllocationSize() > 0 &&
5201 staticGraph_->getLocalNumRows() > 0 &&
5202 valuesUnpacked_wdv.extent (0) == 0,
5203 std::logic_error, err);
5207 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5212 std::ostringstream os;
5214 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5215 if (this->getObjectLabel () !=
"") {
5216 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5218 if (isFillComplete ()) {
5219 os <<
"isFillComplete: true"
5220 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5221 << getGlobalNumCols () <<
"]"
5222 <<
", global number of entries: " << getGlobalNumEntries ()
5226 os <<
"isFillComplete: false"
5227 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5228 << getGlobalNumCols () <<
"]}";
5233 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5237 const Teuchos::EVerbosityLevel verbLevel)
const
5241 using Teuchos::ArrayView;
5242 using Teuchos::Comm;
5244 using Teuchos::TypeNameTraits;
5245 using Teuchos::VERB_DEFAULT;
5246 using Teuchos::VERB_NONE;
5247 using Teuchos::VERB_LOW;
5248 using Teuchos::VERB_MEDIUM;
5249 using Teuchos::VERB_HIGH;
5250 using Teuchos::VERB_EXTREME;
5252 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5254 if (vl == VERB_NONE) {
5259 Teuchos::OSTab tab0 (out);
5261 RCP<const Comm<int> > comm = this->getComm();
5262 const int myRank = comm->getRank();
5263 const int numProcs = comm->getSize();
5265 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5268 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5278 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5280 Teuchos::OSTab tab1 (out);
5283 if (this->getObjectLabel () !=
"") {
5284 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5287 out <<
"Template parameters:" << endl;
5288 Teuchos::OSTab tab2 (out);
5289 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5290 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5291 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5292 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5294 if (isFillComplete()) {
5295 out <<
"isFillComplete: true" << endl
5296 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5297 << getGlobalNumCols () <<
"]" << endl
5298 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5299 << endl <<
"Global max number of entries in a row: "
5300 << getGlobalMaxNumRowEntries () << endl;
5303 out <<
"isFillComplete: false" << endl
5304 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5305 << getGlobalNumCols () <<
"]" << endl;
5309 if (vl < VERB_MEDIUM) {
5315 out << endl <<
"Row Map:" << endl;
5317 if (getRowMap ().is_null ()) {
5319 out <<
"null" << endl;
5326 getRowMap ()->describe (out, vl);
5331 out <<
"Column Map: ";
5333 if (getColMap ().is_null ()) {
5335 out <<
"null" << endl;
5337 }
else if (getColMap () == getRowMap ()) {
5339 out <<
"same as row Map" << endl;
5345 getColMap ()->describe (out, vl);
5350 out <<
"Domain Map: ";
5352 if (getDomainMap ().is_null ()) {
5354 out <<
"null" << endl;
5356 }
else if (getDomainMap () == getRowMap ()) {
5358 out <<
"same as row Map" << endl;
5360 }
else if (getDomainMap () == getColMap ()) {
5362 out <<
"same as column Map" << endl;
5368 getDomainMap ()->describe (out, vl);
5373 out <<
"Range Map: ";
5375 if (getRangeMap ().is_null ()) {
5377 out <<
"null" << endl;
5379 }
else if (getRangeMap () == getDomainMap ()) {
5381 out <<
"same as domain Map" << endl;
5383 }
else if (getRangeMap () == getRowMap ()) {
5385 out <<
"same as row Map" << endl;
5391 getRangeMap ()->describe (out, vl);
5395 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5396 if (myRank == curRank) {
5397 out <<
"Process rank: " << curRank << endl;
5398 Teuchos::OSTab tab2 (out);
5399 if (! staticGraph_->indicesAreAllocated ()) {
5400 out <<
"Graph indices not allocated" << endl;
5403 out <<
"Number of allocated entries: "
5404 << staticGraph_->getLocalAllocationSize () << endl;
5406 out <<
"Number of entries: " << getLocalNumEntries () << endl
5407 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5416 if (vl < VERB_HIGH) {
5421 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5422 if (myRank == curRank) {
5423 out << std::setw(width) <<
"Proc Rank"
5424 << std::setw(width) <<
"Global Row"
5425 << std::setw(width) <<
"Num Entries";
5426 if (vl == VERB_EXTREME) {
5427 out << std::setw(width) <<
"(Index,Value)";
5430 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5431 const size_t nE = getNumEntriesInLocalRow(r);
5432 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5433 out << std::setw(width) << myRank
5434 << std::setw(width) << gid
5435 << std::setw(width) << nE;
5436 if (vl == VERB_EXTREME) {
5437 if (isGloballyIndexed()) {
5438 global_inds_host_view_type rowinds;
5439 values_host_view_type rowvals;
5440 getGlobalRowView (gid, rowinds, rowvals);
5441 for (
size_t j = 0; j < nE; ++j) {
5442 out <<
" (" << rowinds[j]
5443 <<
", " << rowvals[j]
5447 else if (isLocallyIndexed()) {
5448 local_inds_host_view_type rowinds;
5449 values_host_view_type rowvals;
5450 getLocalRowView (r, rowinds, rowvals);
5451 for (
size_t j=0; j < nE; ++j) {
5452 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5453 <<
", " << rowvals[j]
5469 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5483 return (srcRowMat !=
nullptr);
5486 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5490 const typename crs_graph_type::padding_type& padding,
5496 using LO = local_ordinal_type;
5497 using row_ptrs_type =
5498 typename local_graph_device_type::row_map_type::non_const_type;
5499 using range_policy =
5500 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5501 const char tfecfFuncName[] =
"applyCrsPadding";
5502 const char suffix[] =
5503 ". Please report this bug to the Tpetra developers.";
5504 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5506 std::unique_ptr<std::string> prefix;
5508 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5509 std::ostringstream os;
5510 os << *prefix <<
"padding: ";
5513 std::cerr << os.str();
5515 const int myRank = ! verbose ? -1 : [&] () {
5516 auto map = this->getMap();
5517 if (map.is_null()) {
5520 auto comm = map->getComm();
5521 if (comm.is_null()) {
5524 return comm->getRank();
5528 if (! myGraph_->indicesAreAllocated()) {
5530 std::ostringstream os;
5531 os << *prefix <<
"Call allocateIndices" << endl;
5532 std::cerr << os.str();
5534 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5546 std::ostringstream os;
5547 os << *prefix <<
"Allocate row_ptrs_beg: "
5548 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5549 std::cerr << os.str();
5551 using Kokkos::view_alloc;
5552 using Kokkos::WithoutInitializing;
5553 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5554 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5556 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5558 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5559 size_t(row_ptr_beg.extent(0) - 1);
5561 std::ostringstream os;
5562 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5563 std::cerr << os.str();
5565 row_ptrs_type row_ptr_end(
5566 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5568 row_ptrs_type num_row_entries_d;
5570 const bool refill_num_row_entries =
5571 myGraph_->k_numRowEntries_.extent(0) != 0;
5573 if (refill_num_row_entries) {
5576 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5577 myGraph_->k_numRowEntries_);
5578 Kokkos::parallel_for
5579 (
"Fill end row pointers", range_policy(0, N),
5580 KOKKOS_LAMBDA (
const size_t i) {
5581 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5588 Kokkos::parallel_for
5589 (
"Fill end row pointers", range_policy(0, N),
5590 KOKKOS_LAMBDA (
const size_t i) {
5591 row_ptr_end(i) = row_ptr_beg(i+1);
5595 if (myGraph_->isGloballyIndexed()) {
5597 myGraph_->gblInds_wdv,
5598 valuesUnpacked_wdv, padding, myRank, verbose);
5599 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5600 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5601 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5602 (newValuesLen != newColIndsLen, std::logic_error,
5603 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5604 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5609 myGraph_->lclIndsUnpacked_wdv,
5610 valuesUnpacked_wdv, padding, myRank, verbose);
5611 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5612 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5613 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5614 (newValuesLen != newColIndsLen, std::logic_error,
5615 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5616 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5620 if (refill_num_row_entries) {
5621 Kokkos::parallel_for
5622 (
"Fill num entries", range_policy(0, N),
5623 KOKKOS_LAMBDA (
const size_t i) {
5624 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5630 std::ostringstream os;
5631 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5632 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5633 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5634 std::cerr << os.str();
5635 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5636 row_ptr_beg.extent(0) );
5638 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5641 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5643 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5644 copyAndPermuteStaticGraph(
5645 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5646 const size_t numSameIDs,
5647 const LocalOrdinal permuteToLIDs[],
5648 const LocalOrdinal permuteFromLIDs[],
5649 const size_t numPermutes)
5651 using Details::ProfilingRegion;
5652 using Teuchos::Array;
5653 using Teuchos::ArrayView;
5655 using LO = LocalOrdinal;
5656 using GO = GlobalOrdinal;
5657 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5658 const char suffix[] =
5659 " Please report this bug to the Tpetra developers.";
5660 ProfilingRegion regionCAP
5661 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5665 std::unique_ptr<std::string> prefix;
5667 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5668 std::ostringstream os;
5669 os << *prefix <<
"Start" << endl;
5671 const char*
const prefix_raw =
5672 verbose ? prefix.get()->c_str() :
nullptr;
5674 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5679 const map_type& srcRowMap = * (srcMat.getRowMap ());
5680 nonconst_global_inds_host_view_type rowInds;
5681 nonconst_values_host_view_type rowVals;
5682 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5683 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5687 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5688 const GO targetGID = sourceGID;
5690 ArrayView<const GO>rowIndsConstView;
5691 ArrayView<const Scalar> rowValsConstView;
5693 if (sourceIsLocallyIndexed) {
5694 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5695 if (rowLength > static_cast<size_t> (rowInds.size())) {
5696 Kokkos::resize(rowInds,rowLength);
5697 Kokkos::resize(rowVals,rowLength);
5701 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5702 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5707 size_t checkRowLength = 0;
5708 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5709 rowValsView, checkRowLength);
5711 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5712 (rowLength != checkRowLength, std::logic_error,
"For "
5713 "global row index " << sourceGID <<
", the source "
5714 "matrix's getNumEntriesInGlobalRow returns a row length "
5715 "of " << rowLength <<
", but getGlobalRowCopy reports "
5716 "a row length of " << checkRowLength <<
"." << suffix);
5723 rowIndsConstView = Teuchos::ArrayView<const GO> (
5724 rowIndsView.data(), rowIndsView.extent(0),
5725 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5726 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5727 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5728 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5733 global_inds_host_view_type rowIndsView;
5734 values_host_view_type rowValsView;
5735 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5740 rowIndsConstView = Teuchos::ArrayView<const GO> (
5741 rowIndsView.data(), rowIndsView.extent(0),
5742 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5743 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5744 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5745 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5753 combineGlobalValues(targetGID, rowIndsConstView,
5755 prefix_raw, debug, verbose);
5759 std::ostringstream os;
5760 os << *prefix <<
"Do permutes" << endl;
5763 const map_type& tgtRowMap = * (this->getRowMap ());
5764 for (
size_t p = 0; p < numPermutes; ++p) {
5765 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5766 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5768 ArrayView<const GO> rowIndsConstView;
5769 ArrayView<const Scalar> rowValsConstView;
5771 if (sourceIsLocallyIndexed) {
5772 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5773 if (rowLength > static_cast<size_t> (rowInds.size ())) {
5774 Kokkos::resize(rowInds,rowLength);
5775 Kokkos::resize(rowVals,rowLength);
5779 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5780 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5785 size_t checkRowLength = 0;
5786 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5787 rowValsView, checkRowLength);
5789 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5790 (rowLength != checkRowLength, std::logic_error,
"For "
5791 "source matrix global row index " << sourceGID <<
", "
5792 "getNumEntriesInGlobalRow returns a row length of " <<
5793 rowLength <<
", but getGlobalRowCopy a row length of "
5794 << checkRowLength <<
"." << suffix);
5801 rowIndsConstView = Teuchos::ArrayView<const GO> (
5802 rowIndsView.data(), rowIndsView.extent(0),
5803 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5804 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5805 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5806 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5811 global_inds_host_view_type rowIndsView;
5812 values_host_view_type rowValsView;
5813 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5818 rowIndsConstView = Teuchos::ArrayView<const GO> (
5819 rowIndsView.data(), rowIndsView.extent(0),
5820 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5821 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5822 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5823 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5828 combineGlobalValues(targetGID, rowIndsConstView,
5830 prefix_raw, debug, verbose);
5834 std::ostringstream os;
5835 os << *prefix <<
"Done" << endl;
5839 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5841 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5842 copyAndPermuteNonStaticGraph(
5843 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5844 const size_t numSameIDs,
5845 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5846 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5847 const size_t numPermutes)
5849 using Details::ProfilingRegion;
5850 using Teuchos::Array;
5851 using Teuchos::ArrayView;
5853 using LO = LocalOrdinal;
5854 using GO = GlobalOrdinal;
5855 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5856 const char suffix[] =
5857 " Please report this bug to the Tpetra developers.";
5858 ProfilingRegion regionCAP
5859 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5863 std::unique_ptr<std::string> prefix;
5865 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5866 std::ostringstream os;
5867 os << *prefix <<
"Start" << endl;
5869 const char*
const prefix_raw =
5870 verbose ? prefix.get()->c_str() :
nullptr;
5873 using row_graph_type = RowGraph<LO, GO, Node>;
5874 const row_graph_type& srcGraph = *(srcMat.getGraph());
5876 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5877 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5878 applyCrsPadding(*padding, verbose);
5880 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5885 const map_type& srcRowMap = * (srcMat.getRowMap ());
5886 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5887 using gids_type = nonconst_global_inds_host_view_type;
5888 using vals_type = nonconst_values_host_view_type;
5891 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5895 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5896 const GO targetGID = sourceGID;
5898 ArrayView<const GO> rowIndsConstView;
5899 ArrayView<const Scalar> rowValsConstView;
5901 if (sourceIsLocallyIndexed) {
5903 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5904 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5905 Kokkos::resize(rowInds,rowLength);
5906 Kokkos::resize(rowVals,rowLength);
5910 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5911 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5916 size_t checkRowLength = 0;
5917 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5921 (rowLength != checkRowLength, std::logic_error,
": For "
5922 "global row index " << sourceGID <<
", the source "
5923 "matrix's getNumEntriesInGlobalRow returns a row length "
5924 "of " << rowLength <<
", but getGlobalRowCopy reports "
5925 "a row length of " << checkRowLength <<
"." << suffix);
5927 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5928 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5931 global_inds_host_view_type rowIndsView;
5932 values_host_view_type rowValsView;
5933 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5939 rowIndsConstView = Teuchos::ArrayView<const GO> (
5940 rowIndsView.data(), rowIndsView.extent(0),
5941 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5942 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5943 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5944 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5950 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5951 rowValsConstView, prefix_raw, debug, verbose);
5955 std::ostringstream os;
5956 os << *prefix <<
"Do permutes" << endl;
5958 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5959 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5961 const map_type& tgtRowMap = * (this->getRowMap ());
5962 for (
size_t p = 0; p < numPermutes; ++p) {
5963 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5964 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5966 ArrayView<const GO> rowIndsConstView;
5967 ArrayView<const Scalar> rowValsConstView;
5969 if (sourceIsLocallyIndexed) {
5970 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5971 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5972 Kokkos::resize(rowInds,rowLength);
5973 Kokkos::resize(rowVals,rowLength);
5977 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5978 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5983 size_t checkRowLength = 0;
5984 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5985 rowValsView, checkRowLength);
5987 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5988 (rowLength != checkRowLength, std::logic_error,
"For "
5989 "source matrix global row index " << sourceGID <<
", "
5990 "getNumEntriesInGlobalRow returns a row length of " <<
5991 rowLength <<
", but getGlobalRowCopy a row length of "
5992 << checkRowLength <<
"." << suffix);
5994 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5995 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5998 global_inds_host_view_type rowIndsView;
5999 values_host_view_type rowValsView;
6000 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6006 rowIndsConstView = Teuchos::ArrayView<const GO> (
6007 rowIndsView.data(), rowIndsView.extent(0),
6008 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6009 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6010 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6011 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6017 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6018 rowValsConstView, prefix_raw, debug, verbose);
6022 std::ostringstream os;
6023 os << *prefix <<
"Done" << endl;
6027 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6032 const size_t numSameIDs,
6033 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6034 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6043 const char tfecfFuncName[] =
"copyAndPermute: ";
6044 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6046 const bool verbose = Behavior::verbose(
"CrsMatrix");
6047 std::unique_ptr<std::string> prefix;
6049 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6050 std::ostringstream os;
6051 os << *prefix << endl
6052 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6053 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6062 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6064 std::cerr << os.str ();
6067 const auto numPermute = permuteToLIDs.extent (0);
6068 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6069 (numPermute != permuteFromLIDs.extent (0),
6070 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6071 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6072 << permuteFromLIDs.extent (0) <<
".");
6077 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6078 if (isStaticGraph ()) {
6079 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6080 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6081 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6082 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6084 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6085 permuteToLIDs_h.data(),
6086 permuteFromLIDs_h.data(),
6090 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6091 permuteFromLIDs, numPermute);
6095 std::ostringstream os;
6096 os << *prefix <<
"Done" << endl;
6097 std::cerr << os.str();
6101 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6106 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6107 Kokkos::DualView<char*, buffer_device_type>& exports,
6108 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6109 size_t& constantNumPackets)
6114 using Teuchos::outArg;
6115 using Teuchos::REDUCE_MAX;
6116 using Teuchos::reduceAll;
6118 typedef LocalOrdinal LO;
6119 typedef GlobalOrdinal GO;
6120 const char tfecfFuncName[] =
"packAndPrepare: ";
6121 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6123 const bool debug = Behavior::debug(
"CrsMatrix");
6124 const bool verbose = Behavior::verbose(
"CrsMatrix");
6127 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6128 if (pComm.is_null ()) {
6131 const Teuchos::Comm<int>& comm = *pComm;
6132 const int myRank = comm.getSize ();
6134 std::unique_ptr<std::string> prefix;
6136 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6137 std::ostringstream os;
6138 os << *prefix <<
"Start" << endl
6148 std::cerr << os.str ();
6171 std::ostringstream msg;
6174 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6175 const crs_matrix_type* srcCrsMat =
6176 dynamic_cast<const crs_matrix_type*
> (&source);
6177 if (srcCrsMat !=
nullptr) {
6179 std::ostringstream os;
6180 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6181 "calling packNew" << endl;
6182 std::cerr << os.str ();
6185 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6186 constantNumPackets);
6188 catch (std::exception& e) {
6190 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6194 using Kokkos::HostSpace;
6195 using Kokkos::subview;
6196 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6197 using range_type = Kokkos::pair<size_t, size_t>;
6200 std::ostringstream os;
6201 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6203 std::cerr << os.str ();
6206 const row_matrix_type* srcRowMat =
6207 dynamic_cast<const row_matrix_type*
> (&source);
6208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6209 (srcRowMat ==
nullptr, std::invalid_argument,
6210 "The source object of the Import or Export operation is neither a "
6211 "CrsMatrix (with the same template parameters as the target object), "
6212 "nor a RowMatrix (with the same first four template parameters as the "
6223 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6224 auto exportLIDs_h = exportLIDs.view_host ();
6225 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6226 exportLIDs_h.size ());
6230 Teuchos::Array<char> exports_a;
6236 numPacketsPerLID.clear_sync_state ();
6237 numPacketsPerLID.modify_host ();
6238 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6239 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6240 numPacketsPerLID_h.size ());
6245 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6246 constantNumPackets);
6248 catch (std::exception& e) {
6250 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6254 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6255 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6256 const std::string oldLabel = exports.view_device().label ();
6257 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6258 exports = exports_type (newLabel, newAllocSize);
6263 exports.modify_host();
6265 auto exports_h = exports.view_host ();
6266 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6270 typedef typename exports_type::t_host::execution_space HES;
6271 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6272 Kokkos::View<const char*, host_device_type>
6273 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6280 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6283 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6284 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6285 "one or more participating processes.");
6289 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6290 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6291 "or more participating processes. Here is this process' error "
6292 "message: " << msg.str ());
6296 std::ostringstream os;
6297 os << *prefix <<
"packAndPrepare: Done!" << endl
6307 std::cerr << os.str ();
6311 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6313 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6314 packRow (
char exports[],
6315 const size_t offset,
6316 const size_t numEnt,
6317 const GlobalOrdinal gidsIn[],
6318 const impl_scalar_type valsIn[],
6319 const size_t numBytesPerValue)
const
6322 using Kokkos::subview;
6324 typedef LocalOrdinal LO;
6325 typedef GlobalOrdinal GO;
6326 typedef impl_scalar_type ST;
6334 const LO numEntLO =
static_cast<size_t> (numEnt);
6336 const size_t numEntBeg = offset;
6338 const size_t gidsBeg = numEntBeg + numEntLen;
6339 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6340 const size_t valsBeg = gidsBeg + gidsLen;
6341 const size_t valsLen = numEnt * numBytesPerValue;
6343 char*
const numEntOut = exports + numEntBeg;
6344 char*
const gidsOut = exports + gidsBeg;
6345 char*
const valsOut = exports + valsBeg;
6347 size_t numBytesOut = 0;
6352 Kokkos::pair<int, size_t> p;
6353 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6354 errorCode += p.first;
6355 numBytesOut += p.second;
6357 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6358 errorCode += p.first;
6359 numBytesOut += p.second;
6362 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6363 TEUCHOS_TEST_FOR_EXCEPTION
6364 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6365 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6366 << expectedNumBytes <<
".");
6367 TEUCHOS_TEST_FOR_EXCEPTION
6368 (errorCode != 0, std::runtime_error,
"packRow: "
6369 "PackTraits::packArray returned a nonzero error code");
6374 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6376 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6377 unpackRow (GlobalOrdinal gidsOut[],
6378 impl_scalar_type valsOut[],
6379 const char imports[],
6380 const size_t offset,
6381 const size_t numBytes,
6382 const size_t numEnt,
6383 const size_t numBytesPerValue)
6386 using Kokkos::subview;
6388 typedef LocalOrdinal LO;
6389 typedef GlobalOrdinal GO;
6390 typedef impl_scalar_type ST;
6392 Details::ProfilingRegion region_upack_row(
6393 "Tpetra::CrsMatrix::unpackRow",
6397 if (numBytes == 0) {
6400 const int myRank = this->getMap ()->getComm ()->getRank ();
6401 TEUCHOS_TEST_FOR_EXCEPTION
6402 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6403 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6404 "number of entries to unpack (as reported by numPacketsPerLID) "
6405 "for this row numEnt=" << numEnt <<
" != 0.");
6410 if (numEnt == 0 && numBytes != 0) {
6411 const int myRank = this->getMap ()->getComm ()->getRank ();
6412 TEUCHOS_TEST_FOR_EXCEPTION
6413 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6414 "unpackRow: The number of entries to unpack (as reported by "
6415 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6416 "numBytes=" << numBytes <<
" != 0.");
6422 const size_t numEntBeg = offset;
6424 const size_t gidsBeg = numEntBeg + numEntLen;
6425 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6426 const size_t valsBeg = gidsBeg + gidsLen;
6427 const size_t valsLen = numEnt * numBytesPerValue;
6429 const char*
const numEntIn = imports + numEntBeg;
6430 const char*
const gidsIn = imports + gidsBeg;
6431 const char*
const valsIn = imports + valsBeg;
6433 size_t numBytesOut = 0;
6437 if (static_cast<size_t> (numEntOut) != numEnt ||
6438 numEntOut == static_cast<LO> (0)) {
6439 const int myRank = this->getMap ()->getComm ()->getRank ();
6440 std::ostringstream os;
6441 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6442 bool firstErrorCondition =
false;
6443 if (static_cast<size_t> (numEntOut) != numEnt) {
6444 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6445 <<
" does not equal number of entries unpacked from imports "
6446 "buffer numEntOut=" << numEntOut <<
".";
6447 firstErrorCondition =
true;
6449 if (numEntOut == static_cast<LO> (0)) {
6450 if (firstErrorCondition) {
6453 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6454 "but number of bytes to unpack for this row numBytes=" << numBytes
6455 <<
" != 0. This should never happen, since packRow should only "
6456 "ever pack rows with a nonzero number of entries. In this case, "
6457 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6460 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6464 Kokkos::pair<int, size_t> p;
6465 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6466 errorCode += p.first;
6467 numBytesOut += p.second;
6469 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6470 errorCode += p.first;
6471 numBytesOut += p.second;
6474 TEUCHOS_TEST_FOR_EXCEPTION
6475 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6476 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6478 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6479 TEUCHOS_TEST_FOR_EXCEPTION
6480 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6481 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6482 << expectedNumBytes <<
".");
6484 TEUCHOS_TEST_FOR_EXCEPTION
6485 (errorCode != 0, std::runtime_error,
"unpackRow: "
6486 "PackTraits::unpackArray returned a nonzero error code");
6491 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6493 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6494 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6495 size_t& totalNumEntries,
6496 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6498 using Details::Behavior;
6501 typedef impl_scalar_type IST;
6502 typedef LocalOrdinal LO;
6503 typedef GlobalOrdinal GO;
6509 const bool verbose = Behavior::verbose(
"CrsMatrix");
6510 std::unique_ptr<std::string> prefix;
6512 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6513 std::ostringstream os;
6514 os << *prefix <<
"Before:"
6522 std::cerr << os.str ();
6527 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6529 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6530 auto exportLIDs_h = exportLIDs.view_host ();
6533 totalNumEntries = 0;
6534 for (LO i = 0; i < numExportLIDs; ++i) {
6535 const LO lclRow = exportLIDs_h[i];
6536 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6539 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6542 totalNumEntries += curNumEntries;
6553 const size_t allocSize =
6554 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6555 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6556 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
6557 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6559 const std::string oldLabel = exports.view_device().label ();
6560 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6561 exports = exports_type (newLabel, allocSize);
6565 std::ostringstream os;
6566 os << *prefix <<
"After:"
6574 std::cerr << os.str ();
6578 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6581 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6582 Kokkos::DualView<char*, buffer_device_type>& exports,
6583 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6584 size_t& constantNumPackets)
const
6588 if (this->isStaticGraph ()) {
6591 constantNumPackets);
6594 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6595 constantNumPackets);
6599 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6602 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6603 Kokkos::DualView<char*, buffer_device_type>& exports,
6604 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6605 size_t& constantNumPackets)
const
6613 using LO = LocalOrdinal;
6614 using GO = GlobalOrdinal;
6615 using ST = impl_scalar_type;
6616 const char tfecfFuncName[] =
"packNonStaticNew: ";
6618 const bool verbose = Behavior::verbose(
"CrsMatrix");
6619 std::unique_ptr<std::string> prefix;
6621 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6622 std::ostringstream os;
6623 os << *prefix <<
"Start" << endl;
6624 std::cerr << os.str ();
6627 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6629 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
6630 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6631 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6637 constantNumPackets = 0;
6642 size_t totalNumEntries = 0;
6643 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6644 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6647 exports.clear_sync_state();
6648 exports.modify_host();
6649 auto exports_h = exports.view_host ();
6651 std::ostringstream os;
6652 os << *prefix <<
"After marking exports as modified on host, "
6654 std::cerr << os.str ();
6658 auto exportLIDs_h = exportLIDs.view_host ();
6661 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6662 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6663 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6668 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6672 typename global_inds_host_view_type::non_const_type gidsIn_k;
6673 if (this->isLocallyIndexed()) {
6675 typename global_inds_host_view_type::non_const_type(
"packGids",
6680 for (
size_t i = 0; i < numExportLIDs; ++i) {
6681 const LO lclRow = exportLIDs_h[i];
6683 size_t numBytes = 0;
6684 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6691 numPacketsPerLID_h[i] = 0;
6695 if (this->isLocallyIndexed ()) {
6696 typename global_inds_host_view_type::non_const_type gidsIn;
6697 values_host_view_type valsIn;
6701 local_inds_host_view_type lidsIn;
6702 this->getLocalRowView (lclRow, lidsIn, valsIn);
6703 const map_type& colMap = * (this->getColMap ());
6704 for (
size_t k = 0; k < numEnt; ++k) {
6705 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6707 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6709 const size_t numBytesPerValue =
6710 PackTraits<ST>::packValueCount (valsIn[0]);
6711 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6712 gidsIn.data (), valsIn.data (),
6715 else if (this->isGloballyIndexed ()) {
6716 global_inds_host_view_type gidsIn;
6717 values_host_view_type valsIn;
6723 const map_type& rowMap = * (this->getRowMap ());
6724 const GO gblRow = rowMap.getGlobalElement (lclRow);
6725 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6727 const size_t numBytesPerValue =
6728 PackTraits<ST>::packValueCount (valsIn[0]);
6729 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6730 gidsIn.data (), valsIn.data (),
6737 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6738 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6739 "First invalid offset into 'exports' pack buffer at index i = " << i
6740 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6741 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6746 numPacketsPerLID_h[i] = numBytes;
6751 std::ostringstream os;
6752 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6759 std::cerr << os.str ();
6763 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6765 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6766 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6767 const LocalOrdinal numEnt,
6768 const impl_scalar_type vals[],
6769 const GlobalOrdinal cols[],
6771 const char*
const prefix,
6775 using GO = GlobalOrdinal;
6779 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6780 Teuchos::ArrayView<const GO> cols_av
6781 (numEnt == 0 ?
nullptr : cols, numEnt);
6782 Teuchos::ArrayView<const Scalar> vals_av
6783 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
6788 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6789 prefix, debug, verbose);
6793 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6795 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6796 combineGlobalValues(
6797 const GlobalOrdinal globalRowIndex,
6798 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6799 const Teuchos::ArrayView<const Scalar>& values,
6801 const char*
const prefix,
6805 const char tfecfFuncName[] =
"combineGlobalValues: ";
6807 if (isStaticGraph ()) {
6811 if (combineMode ==
ADD) {
6812 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6814 else if (combineMode ==
REPLACE) {
6815 replaceGlobalValues (globalRowIndex, columnIndices, values);
6817 else if (combineMode ==
ABSMAX) {
6818 using ::Tpetra::Details::AbsMax;
6820 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6824 else if (combineMode ==
INSERT) {
6825 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6826 (isStaticGraph() && combineMode ==
INSERT,
6827 std::invalid_argument,
"INSERT combine mode is forbidden "
6828 "if the matrix has a static (const) graph (i.e., was "
6829 "constructed with the CrsMatrix constructor that takes a "
6830 "const CrsGraph pointer).");
6833 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6834 (
true, std::logic_error,
"Invalid combine mode; should "
6836 "Please report this bug to the Tpetra developers.");
6840 if (combineMode ==
ADD || combineMode ==
INSERT) {
6847 insertGlobalValuesFilteredChecked(globalRowIndex,
6848 columnIndices, values, prefix, debug, verbose);
6859 else if (combineMode ==
ABSMAX) {
6860 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6861 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6862 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6865 else if (combineMode ==
REPLACE) {
6866 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6867 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6868 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6872 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6873 true, std::logic_error,
"Should never get here! Please report this "
6874 "bug to the Tpetra developers.");
6879 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6883 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6884 Kokkos::DualView<char*, buffer_device_type> imports,
6885 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6886 const size_t constantNumPackets,
6893 const char tfecfFuncName[] =
"unpackAndCombine: ";
6894 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6896 const bool debug = Behavior::debug(
"CrsMatrix");
6897 const bool verbose = Behavior::verbose(
"CrsMatrix");
6898 constexpr
int numValidModes = 5;
6901 const char* validModeNames[numValidModes] =
6902 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6904 std::unique_ptr<std::string> prefix;
6906 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6907 std::ostringstream os;
6908 os << *prefix <<
"Start:" << endl
6918 << *prefix <<
" constantNumPackets: " << constantNumPackets
6922 std::cerr << os.str ();
6926 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6927 validModes+numValidModes) {
6928 std::ostringstream os;
6929 os <<
"Invalid combine mode. Valid modes are {";
6930 for (
int k = 0; k < numValidModes; ++k) {
6931 os << validModeNames[k];
6932 if (k < numValidModes - 1) {
6937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6938 (
true, std::invalid_argument, os.str ());
6940 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6941 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6942 std::invalid_argument,
"importLIDs.extent(0)="
6943 << importLIDs.extent(0)
6944 <<
" != numPacketsPerLID.extent(0)="
6945 << numPacketsPerLID.extent(0) <<
".");
6948 if (combineMode ==
ZERO) {
6953 using Teuchos::reduceAll;
6954 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6957 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6958 constantNumPackets, combineMode,
6960 }
catch (std::exception& e) {
6965 const Teuchos::Comm<int>& comm = * (this->getComm ());
6966 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6967 lclBad, Teuchos::outArg (gblBad));
6973 std::ostringstream os;
6974 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6975 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6976 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
6977 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6978 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6979 "threw an exception on one or more participating processes: "
6980 << endl << msg->str ());
6984 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6985 constantNumPackets, combineMode,
6990 std::ostringstream os;
6991 os << *prefix <<
"Done!" << endl
7001 std::cerr << os.str ();
7005 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7009 const Kokkos::DualView<
const local_ordinal_type*,
7010 buffer_device_type>& importLIDs,
7011 Kokkos::DualView<char*, buffer_device_type> imports,
7012 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7013 const size_t constantNumPackets,
7018 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7022 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7023 std::unique_ptr<std::string> prefix;
7025 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7026 std::ostringstream os;
7027 os << *prefix <<
"isStaticGraph(): "
7028 << (isStaticGraph() ?
"true" :
"false")
7029 <<
", importLIDs.extent(0): "
7030 << importLIDs.extent(0)
7031 <<
", imports.extent(0): "
7032 << imports.extent(0)
7033 <<
", numPacketsPerLID.extent(0): "
7034 << numPacketsPerLID.extent(0)
7036 std::cerr << os.str();
7039 if (isStaticGraph ()) {
7040 using Details::unpackCrsMatrixAndCombineNew;
7041 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7042 importLIDs, constantNumPackets,
7047 using padding_type =
typename crs_graph_type::padding_type;
7048 std::unique_ptr<padding_type> padding;
7050 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7051 importLIDs, imports, numPacketsPerLID, verbose);
7053 catch (std::exception& e) {
7054 const auto rowMap = getRowMap();
7055 const auto comm = rowMap.is_null() ? Teuchos::null :
7057 const int myRank = comm.is_null() ? -1 : comm->getRank();
7058 TEUCHOS_TEST_FOR_EXCEPTION
7059 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7060 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7061 "threw an exception: " << e.what());
7064 std::ostringstream os;
7065 os << *prefix <<
"Call applyCrsPadding" << endl;
7066 std::cerr << os.str();
7068 applyCrsPadding(*padding, verbose);
7071 std::ostringstream os;
7072 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7073 std::cerr << os.str();
7075 unpackAndCombineImplNonStatic(importLIDs, imports,
7082 std::ostringstream os;
7083 os << *prefix <<
"Done" << endl;
7084 std::cerr << os.str();
7088 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7090 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7091 unpackAndCombineImplNonStatic(
7092 const Kokkos::DualView<
const local_ordinal_type*,
7093 buffer_device_type>& importLIDs,
7094 Kokkos::DualView<char*, buffer_device_type> imports,
7095 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7096 const size_t constantNumPackets,
7100 using Kokkos::subview;
7101 using Kokkos::MemoryUnmanaged;
7102 using Details::Behavior;
7105 using Details::PackTraits;
7106 using Details::ScalarViewTraits;
7108 using LO = LocalOrdinal;
7109 using GO = GlobalOrdinal;
7110 using ST = impl_scalar_type;
7111 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7113 typename View<int*, device_type>::HostMirror::execution_space;
7114 using pair_type = std::pair<typename View<int*, HES>::size_type,
7115 typename View<int*, HES>::size_type>;
7116 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7117 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7118 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7120 const bool debug = Behavior::debug(
"CrsMatrix");
7121 const bool verbose = Behavior::verbose(
"CrsMatrix");
7122 std::unique_ptr<std::string> prefix;
7124 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7125 std::ostringstream os;
7126 os << *prefix << endl;
7127 std::cerr << os.str ();
7129 const char*
const prefix_raw =
7130 verbose ? prefix.get()->c_str() :
nullptr;
7132 const size_type numImportLIDs = importLIDs.extent (0);
7133 if (combineMode ==
ZERO || numImportLIDs == 0) {
7137 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7138 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7143 if (imports.need_sync_host()) {
7144 imports.sync_host ();
7146 auto imports_h = imports.view_host();
7149 if (numPacketsPerLID.need_sync_host()) {
7150 numPacketsPerLID.sync_host ();
7152 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7154 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7155 auto importLIDs_h = importLIDs.view_host();
7157 size_t numBytesPerValue;
7168 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7173 size_t maxRowNumEnt = 0;
7174 for (size_type i = 0; i < numImportLIDs; ++i) {
7175 const size_t numBytes = numPacketsPerLID_h[i];
7176 if (numBytes == 0) {
7181 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7182 (offset + numBytes >
size_t(imports_h.extent (0)),
7183 std::logic_error,
": At local row index importLIDs_h[i="
7184 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7185 <<
") + numBytes (=" << numBytes <<
") > "
7186 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7191 const size_t theNumBytes =
7193 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7194 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7195 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7197 const char*
const inBuf = imports_h.data () + offset;
7198 const size_t actualNumBytes =
7202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7203 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7204 <<
", actualNumBytes=" << actualNumBytes
7205 <<
" > numBytes=" << numBytes <<
".");
7206 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7207 (numEntLO == 0, std::logic_error,
": At local row index "
7208 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7209 "the number of entries read from the packed data is "
7210 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7214 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7222 View<GO*, HES> gblColInds;
7223 View<LO*, HES> lclColInds;
7224 View<ST*, HES> vals;
7237 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7238 gid, maxRowNumEnt,
"gids");
7239 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7240 lid, maxRowNumEnt,
"lids");
7241 vals = ScalarViewTraits<ST, HES>::allocateArray(
7242 val, maxRowNumEnt,
"vals");
7246 for (size_type i = 0; i < numImportLIDs; ++i) {
7247 const size_t numBytes = numPacketsPerLID_h[i];
7248 if (numBytes == 0) {
7252 const char*
const inBuf = imports_h.data () + offset;
7255 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7256 const LO lclRow = importLIDs_h[i];
7258 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7259 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7261 const size_t numBytesOut =
7262 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7263 offset, numBytes, numEnt, numBytesPerValue);
7264 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7265 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7266 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7267 << numBytesOut <<
".");
7269 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7270 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7271 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7272 combineMode, prefix_raw, debug, verbose);
7278 std::ostringstream os;
7279 os << *prefix <<
"Done" << endl;
7280 std::cerr << os.str();
7284 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7285 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7288 const bool force)
const
7290 using Teuchos::null;
7294 TEUCHOS_TEST_FOR_EXCEPTION(
7295 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7296 "MapMultiVector: You may only call this method if the matrix has a "
7297 "column Map. If the matrix does not yet have a column Map, you should "
7298 "first call fillComplete (with domain and range Map if necessary).");
7302 TEUCHOS_TEST_FOR_EXCEPTION(
7303 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7304 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7305 "this matrix's graph is fill complete.");
7308 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7309 RCP<const map_type> colMap = this->getColMap ();
7322 if (! importer.is_null () || force) {
7323 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7324 X_colMap = rcp (
new MV (colMap, numVecs));
7327 importMV_ = X_colMap;
7330 X_colMap = importMV_;
7341 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7342 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7345 const bool force)
const
7347 using Teuchos::null;
7353 TEUCHOS_TEST_FOR_EXCEPTION(
7354 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7355 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7356 "matrix's graph is fill complete.");
7359 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7363 RCP<const map_type> rowMap = this->getRowMap ();
7375 if (! exporter.is_null () || force) {
7376 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7377 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7378 exportMV_ = Y_rowMap;
7381 Y_rowMap = exportMV_;
7387 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7392 TEUCHOS_TEST_FOR_EXCEPTION(
7393 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7394 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7395 "was created with a constant graph (that is, when it was created using "
7396 "the version of its constructor that takes an RCP<const CrsGraph>). "
7397 "This is because the matrix is not allowed to modify the graph in that "
7398 "case, but removing empty processes requires modifying the graph.");
7399 myGraph_->removeEmptyProcessesInPlace (newMap);
7403 this->map_ = this->getRowMap ();
7407 staticGraph_ = Teuchos::rcp_const_cast<
const Graph> (myGraph_);
7410 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7411 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7416 const Teuchos::RCP<const map_type>& domainMap,
7417 const Teuchos::RCP<const map_type>& rangeMap,
7418 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7420 using Teuchos::Array;
7421 using Teuchos::ArrayView;
7422 using Teuchos::ParameterList;
7425 using Teuchos::rcp_implicit_cast;
7426 using Teuchos::sublist;
7430 using crs_matrix_type =
7432 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7436 std::unique_ptr<std::string> prefix;
7438 prefix = this->createPrefix(
"CrsMatrix",
"add");
7439 std::ostringstream os;
7440 os << *prefix <<
"Start" << endl;
7441 std::cerr << os.str ();
7444 const crs_matrix_type& B = *
this;
7445 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7446 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7453 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7454 RCP<const map_type> B_domainMap = B.getDomainMap ();
7455 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7457 RCP<const map_type> theDomainMap = domainMap;
7458 RCP<const map_type> theRangeMap = rangeMap;
7460 if (domainMap.is_null ()) {
7461 if (B_domainMap.is_null ()) {
7462 TEUCHOS_TEST_FOR_EXCEPTION(
7463 A_domainMap.is_null (), std::invalid_argument,
7464 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7465 "then you must supply a nonnull domain Map to this method.");
7466 theDomainMap = A_domainMap;
7468 theDomainMap = B_domainMap;
7471 if (rangeMap.is_null ()) {
7472 if (B_rangeMap.is_null ()) {
7473 TEUCHOS_TEST_FOR_EXCEPTION(
7474 A_rangeMap.is_null (), std::invalid_argument,
7475 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7476 "then you must supply a nonnull range Map to this method.");
7477 theRangeMap = A_rangeMap;
7479 theRangeMap = B_rangeMap;
7487 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7488 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7489 TEUCHOS_TEST_FOR_EXCEPTION
7490 (! B_domainMap->isSameAs(*A_domainMap),
7491 std::invalid_argument,
7492 errPfx <<
"The input RowMatrix A must have a domain Map "
7493 "which is the same as (isSameAs) this RowMatrix's "
7495 TEUCHOS_TEST_FOR_EXCEPTION
7496 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7497 errPfx <<
"The input RowMatrix A must have a range Map "
7498 "which is the same as (isSameAs) this RowMatrix's range "
7500 TEUCHOS_TEST_FOR_EXCEPTION
7501 (! domainMap.is_null() &&
7502 ! domainMap->isSameAs(*B_domainMap),
7503 std::invalid_argument,
7504 errPfx <<
"The input domain Map must be the same as "
7505 "(isSameAs) this RowMatrix's domain Map.");
7506 TEUCHOS_TEST_FOR_EXCEPTION
7507 (! rangeMap.is_null() &&
7508 ! rangeMap->isSameAs(*B_rangeMap),
7509 std::invalid_argument,
7510 errPfx <<
"The input range Map must be the same as "
7511 "(isSameAs) this RowMatrix's range Map.");
7514 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7515 TEUCHOS_TEST_FOR_EXCEPTION
7516 (! domainMap.is_null() &&
7517 ! domainMap->isSameAs(*B_domainMap),
7518 std::invalid_argument,
7519 errPfx <<
"The input domain Map must be the same as "
7520 "(isSameAs) this RowMatrix's domain Map.");
7521 TEUCHOS_TEST_FOR_EXCEPTION
7522 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7523 std::invalid_argument,
7524 errPfx <<
"The input range Map must be the same as "
7525 "(isSameAs) this RowMatrix's range Map.");
7528 TEUCHOS_TEST_FOR_EXCEPTION
7529 (domainMap.is_null() || rangeMap.is_null(),
7530 std::invalid_argument, errPfx <<
"If neither A nor B "
7531 "have a domain and range Map, then you must supply a "
7532 "nonnull domain and range Map to this method.");
7539 bool callFillComplete =
true;
7540 RCP<ParameterList> constructorSublist;
7541 RCP<ParameterList> fillCompleteSublist;
7542 if (! params.is_null()) {
7544 params->get(
"Call fillComplete", callFillComplete);
7545 constructorSublist = sublist(params,
"Constructor parameters");
7546 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7549 RCP<const map_type> A_rowMap = A.
getRowMap ();
7550 RCP<const map_type> B_rowMap = B.getRowMap ();
7551 RCP<const map_type> C_rowMap = B_rowMap;
7552 RCP<crs_matrix_type> C;
7558 if (A_rowMap->isSameAs (*B_rowMap)) {
7559 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7560 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7563 if (alpha != ZERO) {
7564 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7566 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7571 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7572 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7573 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7577 if (constructorSublist.is_null ()) {
7578 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7580 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7581 constructorSublist));
7592 TEUCHOS_TEST_FOR_EXCEPTION
7593 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7594 "be the same for statically allocated matrices, to ensure "
7595 "that there is sufficient space to do the addition.");
7598 TEUCHOS_TEST_FOR_EXCEPTION
7599 (C.is_null (), std::logic_error,
7600 errPfx <<
"C should not be null at this point. "
7601 "Please report this bug to the Tpetra developers.");
7604 std::ostringstream os;
7605 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7606 std::cerr << os.str ();
7608 using gids_type = nonconst_global_inds_host_view_type;
7609 using vals_type = nonconst_values_host_view_type;
7613 if (alpha != ZERO) {
7614 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7615 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7617 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7618 if (A_numEntries > static_cast<size_t> (ind.size ())) {
7619 Kokkos::resize(ind,A_numEntries);
7620 Kokkos::resize(val,A_numEntries);
7622 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7623 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7627 for (
size_t k = 0; k < A_numEntries; ++k) {
7628 valView[k] *= alpha;
7631 C->insertGlobalValues (globalRow, A_numEntries,
7632 reinterpret_cast<Scalar *>(valView.data()),
7638 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7639 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7640 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7641 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7642 if (B_numEntries > static_cast<size_t> (ind.size ())) {
7643 Kokkos::resize(ind,B_numEntries);
7644 Kokkos::resize(val,B_numEntries);
7646 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7647 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7648 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7651 for (
size_t k = 0; k < B_numEntries; ++k) {
7655 C->insertGlobalValues (globalRow, B_numEntries,
7656 reinterpret_cast<Scalar *>(valView.data()),
7661 if (callFillComplete) {
7663 std::ostringstream os;
7664 os << *prefix <<
"Call fillComplete on C" << endl;
7665 std::cerr << os.str ();
7667 if (fillCompleteSublist.is_null ()) {
7668 C->fillComplete (theDomainMap, theRangeMap);
7670 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7674 std::ostringstream os;
7675 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7676 std::cerr << os.str ();
7680 std::ostringstream os;
7681 os << *prefix <<
"Done" << endl;
7682 std::cerr << os.str ();
7689 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7693 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7694 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7695 const Teuchos::RCP<const map_type>& domainMap,
7696 const Teuchos::RCP<const map_type>& rangeMap,
7697 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7704 using Teuchos::ArrayRCP;
7705 using Teuchos::ArrayView;
7706 using Teuchos::Comm;
7707 using Teuchos::ParameterList;
7710 typedef LocalOrdinal LO;
7711 typedef GlobalOrdinal GO;
7712 typedef node_type NT;
7717 const bool debug = Behavior::debug(
"CrsMatrix");
7718 const bool verbose = Behavior::verbose(
"CrsMatrix");
7719 int MyPID = getComm ()->getRank ();
7721 std::unique_ptr<std::string> verbosePrefix;
7724 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7725 std::ostringstream os;
7726 os <<
"Start" << endl;
7727 std::cerr << os.str();
7734 bool reverseMode =
false;
7735 bool restrictComm =
false;
7737 int mm_optimization_core_count =
7738 Behavior::TAFC_OptimizationCoreCount();
7739 RCP<ParameterList> matrixparams;
7740 bool overrideAllreduce =
false;
7741 bool useKokkosPath =
false;
7742 if (! params.is_null ()) {
7743 matrixparams = sublist (params,
"CrsMatrix");
7744 reverseMode = params->get (
"Reverse Mode", reverseMode);
7745 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7746 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7747 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7748 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7749 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7751 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7752 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7753 if(reverseMode) isMM =
false;
7757 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7759 int reduced_mismatch = 0;
7760 if (isMM && !overrideAllreduce) {
7763 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7764 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7765 rowTransfer.getRemoteLIDs ().size() == 0);
7766 mismatch = (source_vals != target_vals) ? 1 : 0;
7769 Teuchos::REDUCE_MAX, * (getComm ()));
7772 #ifdef HAVE_TPETRA_MMM_TIMINGS
7773 using Teuchos::TimeMonitor;
7775 if(!params.is_null())
7776 label = params->get(
"Timer Label",label);
7777 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7780 std::ostringstream os;
7781 if(isMM) os<<
":MMOpt";
7782 else os<<
":MMLegacy";
7786 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7794 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7795 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7796 TEUCHOS_TEST_FOR_EXCEPTION(
7797 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7798 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7799 "argument must be either an Import or an Export, and its template "
7800 "parameters must match the corresponding template parameters of the "
7808 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type> (domainTransfer);
7809 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type> (domainTransfer);
7811 if(! domainTransfer.is_null()) {
7812 TEUCHOS_TEST_FOR_EXCEPTION(
7813 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7814 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7815 "argument must be either an Import or an Export, and its template "
7816 "parameters must match the corresponding template parameters of the "
7819 TEUCHOS_TEST_FOR_EXCEPTION(
7820 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7821 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7822 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7823 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7824 "arguments must be of the same type (either Import or Export).");
7826 TEUCHOS_TEST_FOR_EXCEPTION(
7827 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7828 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7829 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7830 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7831 "arguments must be of the same type (either Import or Export).");
7837 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7841 RCP<const map_type> MyRowMap = reverseMode ?
7842 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7843 RCP<const map_type> MyColMap;
7844 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7845 domainMap : getDomainMap ();
7846 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7847 rangeMap : getRangeMap ();
7848 RCP<const map_type> BaseRowMap = MyRowMap;
7849 RCP<const map_type> BaseDomainMap = MyDomainMap;
7857 if (! destMat.is_null ()) {
7868 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7869 ! destMat->getGraph ()->isGloballyIndexed ();
7870 TEUCHOS_TEST_FOR_EXCEPTION(
7871 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7872 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7873 "to be nonnull, if its graph is empty (neither locally nor globally "
7882 TEUCHOS_TEST_FOR_EXCEPTION(
7883 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7884 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7885 "input argument 'destMat' is not the same as the (row) Map specified "
7886 "by the input argument 'rowTransfer'.");
7887 TEUCHOS_TEST_FOR_EXCEPTION(
7888 ! destMat->checkSizes (*
this), std::invalid_argument,
7889 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7890 "destination matrix, but checkSizes() indicates that it is not a legal "
7891 "legal target for redistribution from the source matrix (*this). This "
7892 "may mean that they do not have the same dimensions.");
7906 TEUCHOS_TEST_FOR_EXCEPTION(
7907 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7908 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7909 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7910 TEUCHOS_TEST_FOR_EXCEPTION(
7911 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7912 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7913 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7916 TEUCHOS_TEST_FOR_EXCEPTION(
7917 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7918 std::invalid_argument,
7919 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7920 "argument must be the same as the rebalanced domain map 'domainMap'");
7922 TEUCHOS_TEST_FOR_EXCEPTION(
7923 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7924 std::invalid_argument,
7925 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7926 "argument must be the same as the rebalanced domain map 'domainMap'");
7939 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7940 ArrayView<const LO> ExportLIDs = reverseMode ?
7941 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7942 auto RemoteLIDs = reverseMode ?
7943 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7944 auto PermuteToLIDs = reverseMode ?
7945 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7946 auto PermuteFromLIDs = reverseMode ?
7947 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7948 Distributor& Distor = rowTransfer.getDistributor ();
7951 Teuchos::Array<int> SourcePids;
7954 RCP<const map_type> ReducedRowMap, ReducedColMap,
7955 ReducedDomainMap, ReducedRangeMap;
7956 RCP<const Comm<int> > ReducedComm;
7960 if (destMat.is_null ()) {
7961 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7968 #ifdef HAVE_TPETRA_MMM_TIMINGS
7969 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7971 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7972 ReducedComm = ReducedRowMap.is_null () ?
7974 ReducedRowMap->getComm ();
7975 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7977 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7979 MyDomainMap->replaceCommWithSubset (ReducedComm);
7980 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7982 MyRangeMap->replaceCommWithSubset (ReducedComm);
7985 MyRowMap = ReducedRowMap;
7986 MyDomainMap = ReducedDomainMap;
7987 MyRangeMap = ReducedRangeMap;
7990 if (! ReducedComm.is_null ()) {
7991 MyPID = ReducedComm->getRank ();
7998 ReducedComm = MyRowMap->getComm ();
8007 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8010 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8012 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8013 #ifdef HAVE_TPETRA_MMM_TIMINGS
8014 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8022 Import_Util::getPids (*MyImporter, SourcePids,
false);
8024 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8027 #ifdef HAVE_TPETRA_MMM_TIMINGS
8028 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8030 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8031 IntVectorType SourceCol_pids(getColMap());
8033 SourceDomain_pids.putScalar(MyPID);
8035 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8036 SourcePids.resize (getColMap ()->getLocalNumElements ());
8037 SourceCol_pids.get1dCopy (SourcePids ());
8039 else if (MyImporter.is_null ()) {
8041 #ifdef HAVE_TPETRA_MMM_TIMINGS
8042 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8044 SourcePids.resize (getColMap ()->getLocalNumElements ());
8045 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8047 else if ( ! MyImporter.is_null () &&
8048 ! domainTransfer.is_null () ) {
8053 #ifdef HAVE_TPETRA_MMM_TIMINGS
8054 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8058 IntVectorType TargetDomain_pids (domainMap);
8059 TargetDomain_pids.putScalar (MyPID);
8062 IntVectorType SourceDomain_pids (getDomainMap ());
8065 IntVectorType SourceCol_pids (getColMap ());
8067 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8068 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8070 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8071 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8073 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8074 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8076 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8077 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8080 TEUCHOS_TEST_FOR_EXCEPTION(
8081 true, std::logic_error,
"Tpetra::CrsMatrix::"
8082 "transferAndFillComplete: Should never get here! "
8083 "Please report this bug to a Tpetra developer.");
8085 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8086 SourcePids.resize (getColMap ()->getLocalNumElements ());
8087 SourceCol_pids.get1dCopy (SourcePids ());
8089 else if ( ! MyImporter.is_null () &&
8090 BaseDomainMap->isSameAs (*BaseRowMap) &&
8091 getDomainMap ()->isSameAs (*getRowMap ())) {
8093 #ifdef HAVE_TPETRA_MMM_TIMINGS
8094 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8097 IntVectorType TargetRow_pids (domainMap);
8098 IntVectorType SourceRow_pids (getRowMap ());
8099 IntVectorType SourceCol_pids (getColMap ());
8101 TargetRow_pids.putScalar (MyPID);
8102 if (! reverseMode && xferAsImport !=
nullptr) {
8103 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8105 else if (reverseMode && xferAsExport !=
nullptr) {
8106 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8108 else if (! reverseMode && xferAsExport !=
nullptr) {
8109 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8111 else if (reverseMode && xferAsImport !=
nullptr) {
8112 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8115 TEUCHOS_TEST_FOR_EXCEPTION(
8116 true, std::logic_error,
"Tpetra::CrsMatrix::"
8117 "transferAndFillComplete: Should never get here! "
8118 "Please report this bug to a Tpetra developer.");
8121 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8122 SourcePids.resize (getColMap ()->getLocalNumElements ());
8123 SourceCol_pids.get1dCopy (SourcePids ());
8126 TEUCHOS_TEST_FOR_EXCEPTION(
8127 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8128 "transferAndFillComplete: This method only allows either domainMap == "
8129 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8130 "getDomainMap () == getRowMap ()).");
8134 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8136 #ifdef HAVE_TPETRA_MMM_TIMINGS
8137 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8139 if (constantNumPackets == 0) {
8140 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8141 RemoteLIDs.view_host().size ());
8148 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8149 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8155 #ifdef HAVE_TPETRA_MMM_TIMINGS
8156 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8159 using Teuchos::outArg;
8160 using Teuchos::REDUCE_MAX;
8161 using Teuchos::reduceAll;
8164 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8165 const int myRank = comm->getRank ();
8167 std::ostringstream errStrm;
8171 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8174 destMat->numExportPacketsPerLID_.modify_host ();
8175 numExportPacketsPerLID =
8178 catch (std::exception& e) {
8179 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8180 << e.what () << std::endl;
8184 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8185 "an exception not a subclass of std::exception" << std::endl;
8189 if (! comm.is_null ()) {
8190 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8193 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8194 TEUCHOS_TEST_FOR_EXCEPTION(
8195 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8196 "exception on at least one process.");
8200 std::ostringstream os;
8201 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8203 std::cerr << os.str ();
8208 numExportPacketsPerLID,
8211 constantNumPackets);
8213 catch (std::exception& e) {
8214 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8215 << e.what () << std::endl;
8219 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8220 "an exception not a subclass of std::exception" << std::endl;
8225 std::ostringstream os;
8226 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8228 std::cerr << os.str ();
8231 if (! comm.is_null ()) {
8232 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8235 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8236 TEUCHOS_TEST_FOR_EXCEPTION(
8237 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8238 "exception on at least one process.");
8243 destMat->numExportPacketsPerLID_.modify_host ();
8244 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8247 std::ostringstream os;
8248 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8250 std::cerr << os.str ();
8254 numExportPacketsPerLID,
8257 constantNumPackets);
8259 std::ostringstream os;
8260 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8262 std::cerr << os.str ();
8269 #ifdef HAVE_TPETRA_MMM_TIMINGS
8270 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8272 if (! communication_needed) {
8274 std::ostringstream os;
8275 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8276 std::cerr << os.str ();
8281 if (constantNumPackets == 0) {
8283 std::ostringstream os;
8284 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8286 std::cerr << os.str ();
8291 destMat->numExportPacketsPerLID_.sync_host ();
8292 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8294 destMat->numImportPacketsPerLID_.sync_host ();
8295 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8299 std::ostringstream os;
8300 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8302 std::cerr << os.str ();
8304 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8305 destMat->numImportPacketsPerLID_.view_host());
8307 std::ostringstream os;
8308 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8310 std::cerr << os.str ();
8313 size_t totalImportPackets = 0;
8314 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8315 totalImportPackets += numImportPacketsPerLID[i];
8320 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8321 verbosePrefix.get ());
8322 destMat->imports_.modify_host ();
8323 auto hostImports = destMat->imports_.view_host();
8326 destMat->exports_.sync_host ();
8327 auto hostExports = destMat->exports_.view_host();
8329 std::ostringstream os;
8330 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8332 std::cerr << os.str ();
8334 Distor.doReversePostsAndWaits (hostExports,
8335 numExportPacketsPerLID,
8337 numImportPacketsPerLID);
8339 std::ostringstream os;
8340 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8342 std::cerr << os.str ();
8347 std::ostringstream os;
8348 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8350 std::cerr << os.str ();
8352 destMat->imports_.modify_host ();
8353 auto hostImports = destMat->imports_.view_host();
8356 destMat->exports_.sync_host ();
8357 auto hostExports = destMat->exports_.view_host();
8359 std::ostringstream os;
8360 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8362 std::cerr << os.str ();
8364 Distor.doReversePostsAndWaits (hostExports,
8368 std::ostringstream os;
8369 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8371 std::cerr << os.str ();
8376 if (constantNumPackets == 0) {
8378 std::ostringstream os;
8379 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8381 std::cerr << os.str ();
8386 destMat->numExportPacketsPerLID_.sync_host ();
8387 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8389 destMat->numImportPacketsPerLID_.sync_host ();
8390 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8393 std::ostringstream os;
8394 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8396 std::cerr << os.str ();
8398 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8399 destMat->numImportPacketsPerLID_.view_host());
8401 std::ostringstream os;
8402 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8404 std::cerr << os.str ();
8407 size_t totalImportPackets = 0;
8408 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8409 totalImportPackets += numImportPacketsPerLID[i];
8414 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8415 verbosePrefix.get ());
8416 destMat->imports_.modify_host ();
8417 auto hostImports = destMat->imports_.view_host();
8420 destMat->exports_.sync_host ();
8421 auto hostExports = destMat->exports_.view_host();
8423 std::ostringstream os;
8424 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8426 std::cerr << os.str ();
8428 Distor.doPostsAndWaits (hostExports,
8429 numExportPacketsPerLID,
8431 numImportPacketsPerLID);
8433 std::ostringstream os;
8434 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8436 std::cerr << os.str ();
8441 std::ostringstream os;
8442 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8444 std::cerr << os.str ();
8446 destMat->imports_.modify_host ();
8447 auto hostImports = destMat->imports_.view_host();
8450 destMat->exports_.sync_host ();
8451 auto hostExports = destMat->exports_.view_host();
8453 std::ostringstream os;
8454 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8456 std::cerr << os.str ();
8458 Distor.doPostsAndWaits (hostExports,
8462 std::ostringstream os;
8463 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8465 std::cerr << os.str ();
8476 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8478 Teuchos::Array<int> RemotePids;
8480 Teuchos::Array<int> TargetPids;
8486 destMat->numImportPacketsPerLID_.modify_host();
8488 # ifdef HAVE_TPETRA_MMM_TIMINGS
8489 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8491 ArrayRCP<size_t> CSR_rowptr;
8492 ArrayRCP<GO> CSR_colind_GID;
8493 ArrayRCP<LO> CSR_colind_LID;
8494 ArrayRCP<Scalar> CSR_vals;
8496 destMat->imports_.sync_device ();
8497 destMat->numImportPacketsPerLID_.sync_device ();
8499 size_t N = BaseRowMap->getLocalNumElements ();
8501 auto RemoteLIDs_d = RemoteLIDs.view_device();
8502 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8503 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8508 destMat->imports_.view_device(),
8509 destMat->numImportPacketsPerLID_.view_device(),
8523 if (
typeid (LO) ==
typeid (GO)) {
8524 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8527 CSR_colind_LID.resize (CSR_colind_GID.size());
8529 CSR_colind_LID.resize (CSR_colind_GID.size());
8534 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8536 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8538 #ifdef HAVE_TPETRA_MMM_TIMINGS
8539 tmCopySPRdata = Teuchos::null;
8548 std::ostringstream os;
8549 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8551 std::cerr << os.str ();
8554 #ifdef HAVE_TPETRA_MMM_TIMINGS
8555 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8557 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8567 std::ostringstream os;
8568 os << *verbosePrefix <<
"restrictComm="
8569 << (restrictComm ?
"true" :
"false") << std::endl;
8570 std::cerr << os.str ();
8577 #ifdef HAVE_TPETRA_MMM_TIMINGS
8578 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8581 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8583 MyColMap->replaceCommWithSubset (ReducedComm);
8584 MyColMap = ReducedColMap;
8589 std::ostringstream os;
8590 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8591 std::cerr << os.str ();
8593 destMat->replaceColMap (MyColMap);
8600 if (ReducedComm.is_null ()) {
8602 std::ostringstream os;
8603 os << *verbosePrefix <<
"I am no longer in the communicator; "
8604 "returning" << std::endl;
8605 std::cerr << os.str ();
8614 if ((! reverseMode && xferAsImport !=
nullptr) ||
8615 (reverseMode && xferAsExport !=
nullptr)) {
8617 std::ostringstream os;
8618 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8619 std::cerr << os.str ();
8621 #ifdef HAVE_TPETRA_MMM_TIMINGS
8622 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8624 Import_Util::sortCrsEntries (CSR_rowptr(),
8628 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8629 (reverseMode && xferAsImport !=
nullptr)) {
8631 std::ostringstream os;
8632 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8634 std::cerr << os.str();
8636 #ifdef HAVE_TPETRA_MMM_TIMINGS
8637 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8639 Import_Util::sortAndMergeCrsEntries (CSR_rowptr(),
8642 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8643 CSR_colind_LID.resize (CSR_rowptr[N]);
8644 CSR_vals.resize (CSR_rowptr[N]);
8648 TEUCHOS_TEST_FOR_EXCEPTION(
8649 true, std::logic_error,
"Tpetra::CrsMatrix::"
8650 "transferAndFillComplete: Should never get here! "
8651 "Please report this bug to a Tpetra developer.");
8658 std::ostringstream os;
8659 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8660 std::cerr << os.str ();
8669 #ifdef HAVE_TPETRA_MMM_TIMINGS
8670 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8672 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8684 destMat->numImportPacketsPerLID_.modify_host();
8686 # ifdef HAVE_TPETRA_MMM_TIMINGS
8687 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8689 ArrayRCP<size_t> CSR_rowptr;
8690 ArrayRCP<GO> CSR_colind_GID;
8691 ArrayRCP<LO> CSR_colind_LID;
8692 ArrayRCP<Scalar> CSR_vals;
8694 destMat->imports_.sync_device ();
8695 destMat->numImportPacketsPerLID_.sync_device ();
8697 size_t N = BaseRowMap->getLocalNumElements ();
8699 auto RemoteLIDs_d = RemoteLIDs.view_device();
8700 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8701 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8703 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8704 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8705 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8706 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8707 Kokkos::View<int*,device_type> TargetPids_d;
8712 destMat->imports_.view_device(),
8713 destMat->numImportPacketsPerLID_.view_device(),
8725 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8727 #ifdef HAVE_TPETRA_MMM_TIMINGS
8728 tmCopySPRdata = Teuchos::null;
8737 std::ostringstream os;
8738 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8740 std::cerr << os.str ();
8743 #ifdef HAVE_TPETRA_MMM_TIMINGS
8744 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8746 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8756 std::ostringstream os;
8757 os << *verbosePrefix <<
"restrictComm="
8758 << (restrictComm ?
"true" :
"false") << std::endl;
8759 std::cerr << os.str ();
8766 #ifdef HAVE_TPETRA_MMM_TIMINGS
8767 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8770 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8772 MyColMap->replaceCommWithSubset (ReducedComm);
8773 MyColMap = ReducedColMap;
8778 std::ostringstream os;
8779 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8780 std::cerr << os.str ();
8782 destMat->replaceColMap (MyColMap);
8789 if (ReducedComm.is_null ()) {
8791 std::ostringstream os;
8792 os << *verbosePrefix <<
"I am no longer in the communicator; "
8793 "returning" << std::endl;
8794 std::cerr << os.str ();
8804 if ((! reverseMode && xferAsImport !=
nullptr) ||
8805 (reverseMode && xferAsExport !=
nullptr)) {
8807 std::ostringstream os;
8808 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8809 std::cerr << os.str ();
8811 #ifdef HAVE_TPETRA_MMM_TIMINGS
8812 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8814 Import_Util::sortCrsEntries (CSR_rowptr_d,
8818 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8819 (reverseMode && xferAsImport !=
nullptr)) {
8821 std::ostringstream os;
8822 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8824 std::cerr << os.str();
8826 #ifdef HAVE_TPETRA_MMM_TIMINGS
8827 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8829 Import_Util::sortAndMergeCrsEntries (CSR_rowptr_d,
8834 TEUCHOS_TEST_FOR_EXCEPTION(
8835 true, std::logic_error,
"Tpetra::CrsMatrix::"
8836 "transferAndFillComplete: Should never get here! "
8837 "Please report this bug to a Tpetra developer.");
8845 std::ostringstream os;
8846 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8847 std::cerr << os.str ();
8851 #ifdef HAVE_TPETRA_MMM_TIMINGS
8852 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8854 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8862 #ifdef HAVE_TPETRA_MMM_TIMINGS
8863 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8866 Teuchos::ParameterList esfc_params;
8868 RCP<import_type> MyImport;
8871 if (iallreduceRequest.get () !=
nullptr) {
8873 std::ostringstream os;
8874 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8876 std::cerr << os.str ();
8878 iallreduceRequest->wait ();
8879 if (reduced_mismatch != 0) {
8885 #ifdef HAVE_TPETRA_MMM_TIMINGS
8886 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8891 std::ostringstream os;
8892 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8893 std::cerr << os.str ();
8896 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8897 Teuchos::ArrayRCP<int> type3PIDs;
8898 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8899 auto colind = getCrsGraph()->getLocalIndicesHost();
8902 std::ostringstream os;
8903 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8904 std::cerr << os.str ();
8908 #ifdef HAVE_TPETRA_MMM_TIMINGS
8909 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8911 Import_Util::reverseNeighborDiscovery(*
this,
8923 std::ostringstream os;
8924 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8925 std::cerr << os.str ();
8928 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8929 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8931 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8932 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8934 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8936 std::vector<bool> IsOwned(numCols,
true);
8937 std::vector<int> SentTo(numCols,-1);
8938 if (! MyImporter.is_null ()) {
8939 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8940 IsOwned[rlid]=
false;
8944 std::vector<std::pair<int,GO> > usrtg;
8945 usrtg.reserve(TEPID2.size());
8948 const auto& colMap = * (this->getColMap ());
8950 const LO row = TELID2[i];
8951 const int pid = TEPID2[i];
8952 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8953 const int col = colind[j];
8954 if (IsOwned[col] && SentTo[col] != pid) {
8956 GO gid = colMap.getGlobalElement (col);
8957 usrtg.push_back (std::pair<int,GO> (pid, gid));
8965 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8967 usrtg.erase(eopg,usrtg.end());
8970 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8971 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8974 for(
auto && p : usrtg) {
8975 EPID2[pos]= p.first;
8976 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8980 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8981 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8982 GO InfGID = std::numeric_limits<GO>::max();
8983 int InfPID = INT_MAX;
8986 #endif // TPETRA_MIN3
8987 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
8988 int i1=0, i2=0, i3=0;
8989 int Len1 = EPID1.size();
8990 int Len2 = EPID2.size();
8991 int Len3 = EPID3.size();
8993 int MyLen=Len1+Len2+Len3;
8994 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
8995 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
8998 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
8999 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9000 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9001 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9003 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9004 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9005 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9007 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9008 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9011 #endif // TPETRA_MIN3
9012 bool added_entry=
false;
9014 if(PID1 == MIN_PID && GID1 == MIN_GID){
9015 userExportLIDs[iloc]=ELID1[i1];
9016 userExportPIDs[iloc]=EPID1[i1];
9021 if(PID2 == MIN_PID && GID2 == MIN_GID){
9023 userExportLIDs[iloc]=ELID2[i2];
9024 userExportPIDs[iloc]=EPID2[i2];
9030 if(PID3 == MIN_PID && GID3 == MIN_GID){
9032 userExportLIDs[iloc]=ELID3[i3];
9033 userExportPIDs[iloc]=EPID3[i3];
9041 std::ostringstream os;
9042 os << *verbosePrefix <<
"Create Import" << std::endl;
9043 std::cerr << os.str ();
9046 #ifdef HAVE_TPETRA_MMM_TIMINGS
9047 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9049 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9051 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9052 MyImport = rcp (
new import_type (MyDomainMap,
9055 userExportLIDs.view(0,iloc).getConst(),
9056 userExportPIDs.view(0,iloc).getConst(),
9061 std::ostringstream os;
9062 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9063 std::cerr << os.str ();
9067 #ifdef HAVE_TPETRA_MMM_TIMINGS
9068 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9069 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9071 if(!params.is_null())
9072 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9073 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9079 #ifdef HAVE_TPETRA_MMM_TIMINGS
9080 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9083 std::ostringstream os;
9084 os << *verbosePrefix <<
"Create Import" << std::endl;
9085 std::cerr << os.str ();
9088 #ifdef HAVE_TPETRA_MMM_TIMINGS
9089 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9091 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9092 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9093 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9094 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9097 std::ostringstream os;
9098 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9099 std::cerr << os.str ();
9102 #ifdef HAVE_TPETRA_MMM_TIMINGS
9103 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9104 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9106 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9109 if (!params.is_null ()) {
9110 esfc_params.set (
"compute global constants",
9111 params->get (
"compute global constants",
true));
9113 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9114 MyImport, Teuchos::null,
9115 rcp (
new Teuchos::ParameterList (esfc_params)));
9118 #ifdef HAVE_TPETRA_MMM_TIMINGS
9119 tmIESFC = Teuchos::null;
9123 std::ostringstream os;
9124 os << *verbosePrefix <<
"Done" << endl;
9125 std::cerr << os.str ();
9130 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9135 const Teuchos::RCP<const map_type>& domainMap,
9136 const Teuchos::RCP<const map_type>& rangeMap,
9137 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9139 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9142 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9148 const Teuchos::RCP<const map_type>& domainMap,
9149 const Teuchos::RCP<const map_type>& rangeMap,
9150 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9152 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9155 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9160 const Teuchos::RCP<const map_type>& domainMap,
9161 const Teuchos::RCP<const map_type>& rangeMap,
9162 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9164 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9167 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9173 const Teuchos::RCP<const map_type>& domainMap,
9174 const Teuchos::RCP<const map_type>& rangeMap,
9175 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9177 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9188 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9190 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9192 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9194 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9195 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9197 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9199 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9200 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9201 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9202 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9203 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9204 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9205 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9206 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9207 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9208 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9209 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9210 const Teuchos::RCP<Teuchos::ParameterList>& params);
9212 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9214 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9215 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9216 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9217 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9218 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9219 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9220 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9221 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9222 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9223 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9224 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9225 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9226 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9227 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9228 const Teuchos::RCP<Teuchos::ParameterList>& params);
9231 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9233 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9234 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9235 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9236 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9237 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9238 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9239 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9240 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9241 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9242 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9243 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9244 const Teuchos::RCP<Teuchos::ParameterList>& params);
9246 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9248 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9249 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9250 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9251 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9252 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9253 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9254 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9255 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9256 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9257 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9258 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9259 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9260 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9261 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9262 const Teuchos::RCP<Teuchos::ParameterList>& params);
9265 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9266 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9267 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9268 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9269 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9270 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9272 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
mag_type getNormInf() const
Compute and return the infinity norm of the matrix.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
mag_type getNorm1(bool assumeSymmetric=false) const
Compute and return the 1-norm of the matrix.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
Declaration of Tpetra::Details::EquilibrationInfo.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror num_row_entries_type
Row offsets for "1-D" storage.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
Declaration of Tpetra::computeRowAndColumnOneNorms.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.