10 #ifndef TPETRA_CRSMATRIX_DEF_HPP
11 #define TPETRA_CRSMATRIX_DEF_HPP
23 #include "Tpetra_RowMatrix.hpp"
24 #include "Tpetra_LocalCrsMatrixOperator.hpp"
25 #include "Tpetra_computeRowAndColumnOneNorms.hpp"
34 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
42 #include "Tpetra_Details_packCrsMatrix.hpp"
43 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
45 #include "Teuchos_FancyOStream.hpp"
46 #include "Teuchos_RCP.hpp"
47 #include "Teuchos_DataAccess.hpp"
48 #include "Teuchos_SerialDenseMatrix.hpp"
49 #include "KokkosBlas1_scal.hpp"
50 #include "KokkosSparse_getDiagCopy.hpp"
51 #include "KokkosSparse_spmv.hpp"
64 template<
class T,
class BinaryFunction>
65 T atomic_binary_function_update (T*
const dest,
79 T newVal = f (assume, inputVal);
80 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
81 }
while (assume != oldVal);
101 template<
class Scalar>
105 typedef Teuchos::ScalarTraits<Scalar> STS;
106 return std::max (STS::magnitude (x), STS::magnitude (y));
115 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
116 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
117 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
118 size_t maxNumEntriesPerRow,
119 const Teuchos::RCP<Teuchos::ParameterList>& params) :
122 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
123 "[, RCP<ParameterList>]): ";
124 Teuchos::RCP<crs_graph_type> graph;
126 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
129 catch (std::exception& e) {
130 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
131 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
132 "size_t [, RCP<ParameterList>]) threw an exception: "
139 staticGraph_ = myGraph_;
144 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
147 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
148 const Teuchos::RCP<Teuchos::ParameterList>& params) :
151 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
152 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
153 Teuchos::RCP<crs_graph_type> graph;
159 catch (std::exception& e) {
160 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
161 (
true, std::runtime_error,
"CrsGraph constructor "
162 "(RCP<const Map>, ArrayView<const size_t>"
163 "[, RCP<ParameterList>]) threw an exception: "
170 staticGraph_ = graph;
175 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
178 const Teuchos::RCP<const map_type>& colMap,
179 const size_t maxNumEntPerRow,
180 const Teuchos::RCP<Teuchos::ParameterList>& params) :
183 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
184 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
185 const char suffix[] =
186 " Please report this bug to the Tpetra developers.";
189 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
190 (! staticGraph_.is_null (), std::logic_error,
191 "staticGraph_ is not null at the beginning of the constructor."
193 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
194 (! myGraph_.is_null (), std::logic_error,
195 "myGraph_ is not null at the beginning of the constructor."
197 Teuchos::RCP<crs_graph_type> graph;
203 catch (std::exception& e) {
204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
205 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
206 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
207 "exception: " << e.what ());
213 staticGraph_ = myGraph_;
218 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
221 const Teuchos::RCP<const map_type>& colMap,
222 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
223 const Teuchos::RCP<Teuchos::ParameterList>& params) :
226 const char tfecfFuncName[] =
227 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
228 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
229 Teuchos::RCP<crs_graph_type> graph;
235 catch (std::exception& e) {
236 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
237 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
238 "RCP<const Map>, ArrayView<const size_t>[, "
239 "RCP<ParameterList>]) threw an exception: " << e.what ());
245 staticGraph_ = graph;
251 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
253 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
254 const Teuchos::RCP<Teuchos::ParameterList>& ) :
256 staticGraph_ (graph),
257 storageStatus_ (Details::STORAGE_1D_PACKED)
260 typedef typename local_matrix_device_type::values_type values_type;
261 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
262 "RCP<ParameterList>]): ";
265 std::unique_ptr<std::string> prefix;
267 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
268 std::ostringstream os;
269 os << *prefix <<
"Start" << endl;
270 std::cerr << os.str ();
273 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
274 (graph.is_null (), std::runtime_error,
"Input graph is null.");
275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
276 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
277 "is not fill complete. You must call fillComplete on the "
278 "graph before using it to construct a CrsMatrix. Note that "
279 "calling resumeFill on the graph makes it not fill complete, "
280 "even if you had previously called fillComplete. In that "
281 "case, you must call fillComplete on the graph again.");
289 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
291 std::ostringstream os;
292 os << *prefix <<
"Allocate values: " << numEnt << endl;
293 std::cerr << os.str ();
296 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
298 valuesUnpacked_wdv = valuesPacked_wdv;
303 std::ostringstream os;
304 os << *prefix <<
"Done" << endl;
305 std::cerr << os.str ();
309 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
312 const Teuchos::RCP<const crs_graph_type>& graph,
313 const Teuchos::RCP<Teuchos::ParameterList>& params) :
315 staticGraph_ (graph),
316 storageStatus_ (matrix.storageStatus_)
318 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
319 "local_matrix_device_type::values_type, "
320 "[,RCP<ParameterList>]): ";
321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
322 (graph.is_null (), std::runtime_error,
"Input graph is null.");
323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
324 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
325 "is not fill complete. You must call fillComplete on the "
326 "graph before using it to construct a CrsMatrix. Note that "
327 "calling resumeFill on the graph makes it not fill complete, "
328 "even if you had previously called fillComplete. In that "
329 "case, you must call fillComplete on the graph again.");
331 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
332 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
334 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
335 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
341 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
343 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
344 const typename local_matrix_device_type::values_type& values,
345 const Teuchos::RCP<Teuchos::ParameterList>& ) :
347 staticGraph_ (graph),
348 storageStatus_ (Details::STORAGE_1D_PACKED)
350 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
351 "local_matrix_device_type::values_type, "
352 "[,RCP<ParameterList>]): ";
353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
354 (graph.is_null (), std::runtime_error,
"Input graph is null.");
355 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
356 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
357 "is not fill complete. You must call fillComplete on the "
358 "graph before using it to construct a CrsMatrix. Note that "
359 "calling resumeFill on the graph makes it not fill complete, "
360 "even if you had previously called fillComplete. In that "
361 "case, you must call fillComplete on the graph again.");
370 valuesUnpacked_wdv = valuesPacked_wdv;
381 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
384 const Teuchos::RCP<const map_type>& colMap,
385 const typename local_graph_device_type::row_map_type& rowPointers,
386 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
387 const typename local_matrix_device_type::values_type& values,
388 const Teuchos::RCP<Teuchos::ParameterList>& params) :
390 storageStatus_ (Details::STORAGE_1D_PACKED)
392 using Details::getEntryOnHost;
395 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
396 "RCP<const Map>, ptr, ind, val[, params]): ";
397 const char suffix[] =
398 ". Please report this bug to the Tpetra developers.";
402 std::unique_ptr<std::string> prefix;
404 prefix = this->createPrefix(
405 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
406 std::ostringstream os;
407 os << *prefix <<
"Start" << endl;
408 std::cerr << os.str ();
415 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
416 (values.extent(0) != columnIndices.extent(0),
417 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
418 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
420 if (debug && rowPointers.extent(0) != 0) {
421 const size_t numEnt =
422 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
423 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
424 (numEnt !=
size_t(columnIndices.extent(0)) ||
425 numEnt !=
size_t(values.extent(0)),
426 std::invalid_argument,
"Last entry of rowPointers says that "
427 "the matrix has " << numEnt <<
" entr"
428 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
429 "columnIndices and values don't match this. "
430 "columnIndices.extent(0)=" << columnIndices.extent (0)
431 <<
" and values.extent(0)=" << values.extent (0) <<
".");
434 RCP<crs_graph_type> graph;
436 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
437 columnIndices, params));
439 catch (std::exception& e) {
440 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
441 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
442 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
450 auto lclGraph = graph->getLocalGraphDevice ();
451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
452 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
453 lclGraph.entries.extent (0) != columnIndices.extent (0),
454 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
455 "ind[, params]) did not set the local graph correctly." << suffix);
456 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
457 (lclGraph.entries.extent (0) != values.extent (0),
458 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
459 "params]) did not set the local graph correctly. "
460 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
461 <<
" != values.extent(0) = " << values.extent (0) << suffix);
467 staticGraph_ = graph;
477 valuesUnpacked_wdv = valuesPacked_wdv;
486 std::ostringstream os;
487 os << *prefix <<
"Done" << endl;
488 std::cerr << os.str();
492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
495 const Teuchos::RCP<const map_type>& colMap,
496 const Teuchos::ArrayRCP<size_t>& ptr,
497 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
498 const Teuchos::ArrayRCP<Scalar>& val,
499 const Teuchos::RCP<Teuchos::ParameterList>& params) :
501 storageStatus_ (Details::STORAGE_1D_PACKED)
503 using Kokkos::Compat::getKokkosViewDeepCopy;
504 using Teuchos::av_reinterpret_cast;
506 using values_type =
typename local_matrix_device_type::values_type;
508 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
509 "RCP<const Map>, ptr, ind, val[, params]): ";
511 RCP<crs_graph_type> graph;
516 catch (std::exception& e) {
517 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
518 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
519 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
520 "RCP<ParameterList>]) threw an exception: " << e.what ());
526 staticGraph_ = graph;
539 auto lclGraph = staticGraph_->getLocalGraphDevice ();
540 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
541 (
size_t (lclGraph.row_map.extent (0)) !=
size_t (ptr.size ()) ||
542 size_t (lclGraph.entries.extent (0)) !=
size_t (ind.size ()),
543 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
544 "ptr, ind[, params]) did not set the local graph correctly. "
545 "Please report this bug to the Tpetra developers.");
548 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
550 valuesUnpacked_wdv = valuesPacked_wdv;
560 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
563 const Teuchos::RCP<const map_type>& colMap,
565 const Teuchos::RCP<Teuchos::ParameterList>& params) :
567 storageStatus_ (Details::STORAGE_1D_PACKED),
570 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
571 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
572 const char suffix[] =
573 " Please report this bug to the Tpetra developers.";
575 Teuchos::RCP<crs_graph_type> graph;
578 lclMatrix.graph, params));
580 catch (std::exception& e) {
581 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
582 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
583 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
584 "exception: " << e.what ());
586 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
587 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
588 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
589 "did not produce a fill-complete graph. Please report this bug to the "
590 "Tpetra developers.");
595 staticGraph_ = graph;
598 valuesUnpacked_wdv = valuesPacked_wdv;
600 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
602 "At the end of a CrsMatrix constructor that should produce "
603 "a fillComplete matrix, isFillActive() is true." << suffix);
604 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
606 "CrsMatrix constructor that should produce a fillComplete "
607 "matrix, isFillComplete() is false." << suffix);
611 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
614 const Teuchos::RCP<const map_type>& rowMap,
615 const Teuchos::RCP<const map_type>& colMap,
616 const Teuchos::RCP<const map_type>& domainMap,
617 const Teuchos::RCP<const map_type>& rangeMap,
618 const Teuchos::RCP<Teuchos::ParameterList>& params) :
620 storageStatus_ (Details::STORAGE_1D_PACKED),
623 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
624 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
625 "local_matrix_device_type[, RCP<ParameterList>]): ";
626 const char suffix[] =
627 " Please report this bug to the Tpetra developers.";
629 Teuchos::RCP<crs_graph_type> graph;
631 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
632 domainMap, rangeMap, params));
634 catch (std::exception& e) {
635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
636 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
637 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
638 "RCP<ParameterList>]) threw an exception: " << e.what ());
640 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
641 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
642 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
643 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
644 "not produce a fillComplete graph." << suffix);
649 staticGraph_ = graph;
652 valuesUnpacked_wdv = valuesPacked_wdv;
654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
656 "At the end of a CrsMatrix constructor that should produce "
657 "a fillComplete matrix, isFillActive() is true." << suffix);
658 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
660 "CrsMatrix constructor that should produce a fillComplete "
661 "matrix, isFillComplete() is false." << suffix);
665 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
668 const Teuchos::RCP<const map_type>& rowMap,
669 const Teuchos::RCP<const map_type>& colMap,
670 const Teuchos::RCP<const map_type>& domainMap,
671 const Teuchos::RCP<const map_type>& rangeMap,
672 const Teuchos::RCP<const import_type>& importer,
673 const Teuchos::RCP<const export_type>& exporter,
674 const Teuchos::RCP<Teuchos::ParameterList>& params) :
676 storageStatus_ (Details::STORAGE_1D_PACKED),
680 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
681 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
682 const char suffix[] =
683 " Please report this bug to the Tpetra developers.";
685 Teuchos::RCP<crs_graph_type> graph;
688 domainMap, rangeMap, importer,
691 catch (std::exception& e) {
692 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
693 (
true, std::runtime_error,
"CrsGraph constructor "
694 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
695 "params) threw: " << e.what ());
697 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
698 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
699 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
700 "Export, params) did not produce a fill-complete graph. "
701 "Please report this bug to the Tpetra developers.");
706 staticGraph_ = graph;
709 valuesUnpacked_wdv = valuesPacked_wdv;
711 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
713 "At the end of a CrsMatrix constructor that should produce "
714 "a fillComplete matrix, isFillActive() is true." << suffix);
715 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
717 "CrsMatrix constructor that should produce a fillComplete "
718 "matrix, isFillComplete() is false." << suffix);
722 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
725 const Teuchos::DataAccess copyOrView):
727 staticGraph_ (source.getCrsGraph()),
728 storageStatus_ (source.storageStatus_)
730 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
731 "const CrsMatrix&, const Teuchos::DataAccess): ";
732 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
734 "Source graph must be fillComplete().");
736 if (copyOrView == Teuchos::Copy) {
737 using values_type =
typename local_matrix_device_type::values_type;
739 using Kokkos::view_alloc;
740 using Kokkos::WithoutInitializing;
741 values_type newvals (view_alloc (
"val", WithoutInitializing),
746 valuesUnpacked_wdv = valuesPacked_wdv;
749 else if (copyOrView == Teuchos::View) {
755 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
756 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
757 "has an invalid value " << copyOrView <<
". Valid values "
758 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
759 "Teuchos::View = " << Teuchos::View <<
".");
764 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
769 std::swap(crs_matrix.
importMV_, this->importMV_);
770 std::swap(crs_matrix.
exportMV_, this->exportMV_);
771 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
772 std::swap(crs_matrix.myGraph_, this->myGraph_);
773 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
774 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
777 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
780 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
781 Teuchos::RCP<const Teuchos::Comm<int> >
784 return getCrsGraphRef ().getComm ();
787 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
791 return fillComplete_;
794 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
798 return ! fillComplete_;
801 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
805 return this->getCrsGraphRef ().isStorageOptimized ();
808 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
812 return getCrsGraphRef ().isLocallyIndexed ();
815 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
819 return getCrsGraphRef ().isGloballyIndexed ();
822 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
826 return getCrsGraphRef ().hasColMap ();
829 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
833 return getCrsGraphRef ().getGlobalNumEntries ();
836 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
840 return getCrsGraphRef ().getLocalNumEntries ();
843 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
847 return getCrsGraphRef ().getGlobalNumRows ();
850 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
854 return getCrsGraphRef ().getGlobalNumCols ();
857 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
861 return getCrsGraphRef ().getLocalNumRows ();
865 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
869 return getCrsGraphRef ().getLocalNumCols ();
873 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
877 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
880 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
884 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
887 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
891 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
894 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
898 return getCrsGraphRef ().getLocalMaxNumRowEntries ();
901 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
905 return getRowMap ()->getIndexBase ();
908 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
909 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
912 return getCrsGraphRef ().getRowMap ();
915 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
916 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
919 return getCrsGraphRef ().getColMap ();
922 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
923 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
926 return getCrsGraphRef ().getDomainMap ();
929 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
930 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
933 return getCrsGraphRef ().getRangeMap ();
936 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
937 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
940 if (staticGraph_ != Teuchos::null) {
946 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
947 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
950 if (staticGraph_ != Teuchos::null) {
956 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
961 #ifdef HAVE_TPETRA_DEBUG
962 constexpr
bool debug =
true;
964 constexpr
bool debug =
false;
965 #endif // HAVE_TPETRA_DEBUG
967 if (! this->staticGraph_.is_null ()) {
968 return * (this->staticGraph_);
972 const char tfecfFuncName[] =
"getCrsGraphRef: ";
973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
974 (this->myGraph_.is_null (), std::logic_error,
975 "Both staticGraph_ and myGraph_ are null. "
976 "Please report this bug to the Tpetra developers.");
978 return * (this->myGraph_);
982 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
983 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
987 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
990 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
991 staticGraph_->getLocalGraphDevice());
994 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
995 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
999 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
1000 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
1001 valuesPacked_wdv.getHostView(Access::ReadWrite),
1002 staticGraph_->getLocalGraphHost());
1005 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1009 return myGraph_.is_null ();
1012 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1019 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1026 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1035 const char tfecfFuncName[] =
"allocateValues: ";
1036 const char suffix[] =
1037 " Please report this bug to the Tpetra developers.";
1038 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1040 std::unique_ptr<std::string> prefix;
1042 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1043 std::ostringstream os;
1044 os << *prefix <<
"lg: "
1045 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1047 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1048 <<
"Allocated" << endl;
1049 std::cerr << os.str();
1052 const bool debug = Behavior::debug(
"CrsMatrix");
1054 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1055 (this->staticGraph_.is_null (), std::logic_error,
1056 "staticGraph_ is null." << suffix);
1061 if ((gas == GraphAlreadyAllocated) !=
1062 staticGraph_->indicesAreAllocated ()) {
1063 const char err1[] =
"The caller has asserted that the graph "
1065 const char err2[] =
"already allocated, but the static graph "
1066 "says that its indices are ";
1067 const char err3[] =
"already allocated. ";
1068 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1069 (gas == GraphAlreadyAllocated &&
1070 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1071 err1 << err2 <<
"not " << err3 << suffix);
1072 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1073 (gas != GraphAlreadyAllocated &&
1074 staticGraph_->indicesAreAllocated (), std::logic_error,
1075 err1 <<
"not " << err2 << err3 << suffix);
1083 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1084 (! this->staticGraph_->indicesAreAllocated () &&
1085 this->myGraph_.is_null (), std::logic_error,
1086 "The static graph says that its indices are not allocated, "
1087 "but the graph is not owned by the matrix." << suffix);
1090 if (gas == GraphNotYetAllocated) {
1092 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1093 (this->myGraph_.is_null (), std::logic_error,
1094 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1097 this->myGraph_->allocateIndices (lg, verbose);
1099 catch (std::exception& e) {
1100 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1101 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1102 "threw an exception: " << e.what ());
1105 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1106 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1107 "threw an exception not a subclass of std::exception.");
1112 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1114 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1115 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1116 (this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1117 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1121 using values_type =
typename local_matrix_device_type::values_type;
1123 std::ostringstream os;
1124 os << *prefix <<
"Allocate values_wdv: Pre "
1125 << valuesUnpacked_wdv.extent(0) <<
", post "
1126 << lclTotalNumEntries << endl;
1127 std::cerr << os.str();
1131 values_type(
"Tpetra::CrsMatrix::values",
1132 lclTotalNumEntries));
1136 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1142 using ::Tpetra::Details::getEntryOnHost;
1143 using Teuchos::arcp_const_cast;
1144 using Teuchos::Array;
1145 using Teuchos::ArrayRCP;
1146 using Teuchos::null;
1150 using row_map_type =
typename local_graph_device_type::row_map_type;
1151 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1152 using values_type =
typename local_matrix_device_type::values_type;
1154 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1156 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1157 "fillComplete or expertStaticFillComplete): ";
1158 const char suffix[] =
1159 " Please report this bug to the Tpetra developers.";
1163 std::unique_ptr<std::string> prefix;
1165 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1166 std::ostringstream os;
1167 os << *prefix << endl;
1168 std::cerr << os.str ();
1174 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1175 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1176 "(myGraph_) is null. This means that the matrix has a "
1177 "const (a.k.a. \"static\") graph. fillComplete or "
1178 "expertStaticFillComplete should never call "
1179 "fillLocalGraphAndMatrix in that case." << suffix);
1182 const size_t lclNumRows = this->getLocalNumRows ();
1197 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1198 myGraph_->rowPtrsUnpacked_dev_;
1201 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1202 (curRowOffsets.extent (0) == 0, std::logic_error,
1203 "curRowOffsets.extent(0) == 0.");
1204 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1205 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1206 "curRowOffsets.extent(0) = "
1207 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1208 << (lclNumRows + 1) <<
".");
1209 const size_t numOffsets = curRowOffsets.extent (0);
1210 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1211 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1213 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1214 std::logic_error,
"numOffsets = " <<
1215 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1216 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1217 << numOffsets <<
") = " << valToCheck <<
".");
1220 if (myGraph_->getLocalNumEntries() !=
1221 myGraph_->getLocalAllocationSize()) {
1225 typename row_map_type::non_const_type k_ptrs;
1226 row_map_type k_ptrs_const;
1227 lclinds_1d_type k_inds;
1231 std::ostringstream os;
1232 const auto numEnt = myGraph_->getLocalNumEntries();
1233 const auto allocSize = myGraph_->getLocalAllocationSize();
1234 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1235 <<
", allocSize=" << allocSize << endl;
1236 std::cerr << os.str ();
1244 if (debug && curRowOffsets.extent (0) != 0) {
1245 const size_t numOffsets =
1246 static_cast<size_t> (curRowOffsets.extent (0));
1247 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1248 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1249 (static_cast<size_t> (valToCheck) !=
1250 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1251 std::logic_error,
"(unpacked branch) Before "
1252 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1253 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1254 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1255 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1256 (static_cast<size_t> (valToCheck) !=
1257 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1258 std::logic_error,
"(unpacked branch) Before "
1259 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1260 <<
") = " << valToCheck
1261 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1262 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1270 size_t lclTotalNumEntries = 0;
1276 std::ostringstream os;
1277 os << *prefix <<
"Allocate packed row offsets: "
1278 << (lclNumRows+1) << endl;
1279 std::cerr << os.str ();
1281 typename row_map_type::non_const_type
1282 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1283 typename row_entries_type::const_type numRowEnt_h =
1284 myGraph_->k_numRowEntries_;
1287 lclTotalNumEntries =
1291 k_ptrs = packedRowOffsets;
1292 k_ptrs_const = k_ptrs;
1296 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1297 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1299 "(unpacked branch) After packing k_ptrs, "
1300 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1301 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1302 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1303 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1304 (valToCheck != lclTotalNumEntries, std::logic_error,
1305 "(unpacked branch) After filling k_ptrs, "
1306 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1307 <<
" != total number of entries on the calling process = "
1308 << lclTotalNumEntries <<
".");
1313 std::ostringstream os;
1314 os << *prefix <<
"Allocate packed local column indices: "
1315 << lclTotalNumEntries << endl;
1316 std::cerr << os.str ();
1318 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1320 std::ostringstream os;
1321 os << *prefix <<
"Allocate packed values: "
1322 << lclTotalNumEntries << endl;
1323 std::cerr << os.str ();
1325 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1337 using inds_packer_type = pack_functor<
1338 typename Graph::local_graph_device_type::entries_type::non_const_type,
1339 typename Graph::local_inds_dualv_type::t_dev::const_type,
1340 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1341 typename Graph::local_graph_device_type::row_map_type>;
1342 inds_packer_type indsPacker (
1344 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1345 k_ptrs, curRowOffsets);
1347 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1348 Kokkos::parallel_for
1349 (
"Tpetra::CrsMatrix pack column indices",
1350 range_type (0, lclNumRows), indsPacker);
1354 using vals_packer_type = pack_functor<
1355 typename values_type::non_const_type,
1356 typename values_type::const_type,
1357 typename row_map_type::non_const_type,
1358 typename row_map_type::const_type>;
1359 vals_packer_type valsPacker (
1361 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1362 k_ptrs, curRowOffsets);
1363 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1364 range_type (0, lclNumRows), valsPacker);
1367 const char myPrefix[] =
"(\"Optimize Storage\""
1368 "=true branch) After packing, ";
1369 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1370 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1371 <<
"k_ptrs.extent(0) = 0. This probably means that "
1372 "rowPtrsUnpacked_ was never allocated.");
1373 if (k_ptrs.extent (0) != 0) {
1374 const size_t numOffsets (k_ptrs.extent (0));
1375 const auto valToCheck =
1376 getEntryOnHost (k_ptrs, numOffsets - 1);
1377 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1378 (
size_t (valToCheck) != k_vals.extent (0),
1379 std::logic_error, myPrefix <<
1380 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1381 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1382 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1383 (
size_t (valToCheck) != k_inds.extent (0),
1384 std::logic_error, myPrefix <<
1385 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1386 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1390 myGraph_->setRowPtrsPacked(k_ptrs_const);
1391 myGraph_->lclIndsPacked_wdv =
1398 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1399 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1400 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1401 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1402 valuesPacked_wdv = valuesUnpacked_wdv;
1405 std::ostringstream os;
1406 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1407 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1408 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1409 << valuesUnpacked_wdv.extent(0) << endl;
1410 std::cerr << os.str();
1414 const char myPrefix[] =
1415 "(\"Optimize Storage\"=false branch) ";
1416 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1417 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1418 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1419 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1420 "that rowPtrsUnpacked_ was never allocated.");
1421 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1422 const size_t numOffsets = rowPtrsUnpackedHost.extent (0);
1423 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1424 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1425 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1426 std::logic_error, myPrefix <<
1427 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1428 <<
" != valuesPacked_wdv.extent(0) = "
1429 << valuesPacked_wdv.extent (0) <<
".");
1430 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1431 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1432 std::logic_error, myPrefix <<
1433 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1434 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1435 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1441 const char myPrefix[] =
"After packing, ";
1442 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1443 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1444 (
size_t (rowPtrsPackedHost.extent (0)) !=
size_t (lclNumRows + 1),
1445 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1446 << rowPtrsPackedHost.extent (0) <<
" != lclNumRows+1 = " <<
1447 (lclNumRows+1) <<
".");
1448 if (rowPtrsPackedHost.extent (0) != 0) {
1449 const size_t numOffsets (rowPtrsPackedHost.extent (0));
1450 const size_t valToCheck = rowPtrsPackedHost(numOffsets-1);
1451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1452 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1453 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1454 (numOffsets-1) <<
") = " << valToCheck
1455 <<
" != valuesPacked_wdv.extent(0) = "
1456 << valuesPacked_wdv.extent (0) <<
".");
1457 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1458 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1459 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1460 (numOffsets-1) <<
") = " << valToCheck
1461 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1462 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1470 const bool defaultOptStorage =
1471 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1472 const bool requestOptimizedStorage =
1473 (! params.is_null () &&
1474 params->get (
"Optimize Storage", defaultOptStorage)) ||
1475 (params.is_null () && defaultOptStorage);
1480 if (requestOptimizedStorage) {
1485 std::ostringstream os;
1486 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1487 << myGraph_->k_numRowEntries_.extent(0) << endl;
1488 std::cerr << os.str();
1491 myGraph_->k_numRowEntries_ = row_entries_type ();
1496 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1497 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1498 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1499 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1500 valuesUnpacked_wdv = valuesPacked_wdv;
1502 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1503 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1507 std::ostringstream os;
1508 os << *prefix <<
"User requested NOT to optimize storage"
1510 std::cerr << os.str();
1515 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1520 using ::Tpetra::Details::ProfilingRegion;
1521 using Teuchos::ArrayRCP;
1522 using Teuchos::Array;
1523 using Teuchos::null;
1527 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1528 using non_const_row_map_type =
typename row_map_type::non_const_type;
1529 using values_type =
typename local_matrix_device_type::values_type;
1530 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1531 const size_t lclNumRows = getLocalNumRows();
1534 std::unique_ptr<std::string> prefix;
1536 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1537 std::ostringstream os;
1538 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1539 std::cerr << os.str ();
1551 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1552 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1553 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1555 row_map_type k_ptrs;
1561 bool requestOptimizedStorage =
true;
1562 const bool default_OptimizeStorage =
1563 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1564 if (! params.is_null() &&
1565 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1566 requestOptimizedStorage =
false;
1573 if (! staticGraph_->isStorageOptimized () &&
1574 requestOptimizedStorage) {
1576 (
true, std::runtime_error,
"You requested optimized storage "
1577 "by setting the \"Optimize Storage\" flag to \"true\" in "
1578 "the ParameterList, or by virtue of default behavior. "
1579 "However, the associated CrsGraph was filled separately and "
1580 "requested not to optimize storage. Therefore, the "
1581 "CrsMatrix cannot optimize storage.");
1582 requestOptimizedStorage =
false;
1607 if (nodeNumEntries != nodeNumAllocated) {
1609 std::ostringstream os;
1610 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1611 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1613 std::cerr << os.str();
1618 std::ostringstream os;
1619 os << *prefix <<
"Allocate packed row offsets: "
1620 << (lclNumRows+1) << endl;
1621 std::cerr << os.str();
1623 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1628 size_t lclTotalNumEntries = 0;
1631 typename row_entries_type::const_type numRowEnt_h =
1632 staticGraph_->k_numRowEntries_;
1634 lclTotalNumEntries =
1641 std::ostringstream os;
1642 os << *prefix <<
"Allocate packed values: "
1643 << lclTotalNumEntries << endl;
1644 std::cerr << os.str ();
1646 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1650 typename values_type::non_const_type,
1651 typename values_type::const_type,
1652 typename row_map_type::non_const_type,
1653 typename row_map_type::const_type> valsPacker
1654 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1655 tmpk_ptrs, k_rowPtrs);
1658 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1659 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1660 range_type (0, lclNumRows), valsPacker);
1664 valuesPacked_wdv = valuesUnpacked_wdv;
1666 std::ostringstream os;
1667 os << *prefix <<
"Storage already packed: "
1668 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1669 std::cerr << os.str();
1674 if (requestOptimizedStorage) {
1677 valuesUnpacked_wdv = valuesPacked_wdv;
1679 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1683 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1688 const typename crs_graph_type::SLocalGlobalViews& newInds,
1689 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1690 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1691 const ELocalGlobal lg,
1692 const ELocalGlobal I)
1694 const size_t oldNumEnt = rowInfo.numEntries;
1695 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1701 if (numInserted > 0) {
1702 const size_t startOffset = oldNumEnt;
1703 memcpy ((
void*) &oldRowVals[startOffset], &newRowVals[0],
1704 numInserted *
sizeof (impl_scalar_type));
1708 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1712 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1713 const Teuchos::ArrayView<const Scalar>& values,
1717 const char tfecfFuncName[] =
"insertLocalValues: ";
1719 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1720 (! this->isFillActive (), std::runtime_error,
1721 "Fill is not active. After calling fillComplete, you must call "
1722 "resumeFill before you may insert entries into the matrix again.");
1723 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1724 (this->isStaticGraph (), std::runtime_error,
1725 "Cannot insert indices with static graph; use replaceLocalValues() "
1729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1730 (graph.
colMap_.is_null (), std::runtime_error,
1731 "Cannot insert local indices without a column map.");
1732 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1734 std::runtime_error,
"Graph indices are global; use "
1735 "insertGlobalValues().");
1736 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1737 (values.size () != indices.size (), std::runtime_error,
1738 "values.size() = " << values.size ()
1739 <<
" != indices.size() = " << indices.size () <<
".");
1740 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1741 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1742 "Local row index " << lclRow <<
" does not belong to this process.");
1744 if (! graph.indicesAreAllocated ()) {
1748 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1751 #ifdef HAVE_TPETRA_DEBUG
1752 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1757 using Teuchos::toString;
1760 Teuchos::Array<LocalOrdinal> badColInds;
1761 bool allInColMap =
true;
1762 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1764 allInColMap =
false;
1765 badColInds.push_back (indices[k]);
1768 if (! allInColMap) {
1769 std::ostringstream os;
1770 os <<
"You attempted to insert entries in owned row " << lclRow
1771 <<
", at the following column indices: " << toString (indices)
1773 os <<
"Of those, the following indices are not in the column Map on "
1774 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1775 "the matrix has a column Map already, it is invalid to insert "
1776 "entries at those locations.";
1777 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1778 (
true, std::invalid_argument, os.str ());
1781 #endif // HAVE_TPETRA_DEBUG
1785 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1787 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1788 valsView[offset] += values[k]; };
1789 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1790 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1791 }
else if (CM ==
INSERT) {
1792 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1793 valsView[offset] = values[k]; };
1794 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1795 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1797 std::ostringstream os;
1798 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1799 <<
"but this has not been implemented." << endl;
1800 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1801 (
true, std::invalid_argument, os.str ());
1805 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1809 const LocalOrdinal numEnt,
1810 const Scalar vals[],
1811 const LocalOrdinal cols[],
1814 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1815 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1816 this->insertLocalValues (localRow, colsT, valsT, CM);
1819 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1824 const GlobalOrdinal gblColInds[],
1826 const size_t numInputEnt)
1828 #ifdef HAVE_TPETRA_DEBUG
1829 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1831 const size_t curNumEnt = rowInfo.numEntries;
1832 #endif // HAVE_TPETRA_DEBUG
1834 if (! graph.indicesAreAllocated ()) {
1837 using ::Tpetra::Details::Behavior;
1838 const bool verbose = Behavior::verbose(
"CrsMatrix");
1839 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1844 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1847 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1848 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1849 valsView[offset] += vals[k];
1851 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1852 #ifdef HAVE_TPETRA_DEBUG
1858 #ifdef HAVE_TPETRA_DEBUG
1859 size_t newNumEnt = curNumEnt + numInserted;
1860 const size_t chkNewNumEnt =
1862 if (chkNewNumEnt != newNumEnt) {
1863 std::ostringstream os;
1864 os << std::endl <<
"newNumEnt = " << newNumEnt
1865 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1866 <<
") = " << chkNewNumEnt <<
"." << std::endl
1867 <<
"\torigNumEnt: " << origNumEnt << std::endl
1868 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1869 <<
"\tgblColInds: [";
1870 for (
size_t k = 0; k < numInputEnt; ++k) {
1871 os << gblColInds[k];
1872 if (k +
size_t (1) < numInputEnt) {
1876 os <<
"]" << std::endl
1878 for (
size_t k = 0; k < numInputEnt; ++k) {
1880 if (k +
size_t (1) < numInputEnt) {
1884 os <<
"]" << std::endl;
1886 if (this->supportsRowViews ()) {
1887 values_host_view_type vals2;
1888 if (this->isGloballyIndexed ()) {
1889 global_inds_host_view_type gblColInds2;
1890 const GlobalOrdinal gblRow =
1891 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1893 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1894 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1898 bool getViewThrew =
false;
1900 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1902 catch (std::exception& e) {
1903 getViewThrew =
true;
1904 os <<
"getGlobalRowView threw exception:" << std::endl
1905 << e.what () << std::endl;
1907 if (! getViewThrew) {
1908 os <<
"\tNew global column indices: ";
1909 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1910 os << gblColInds2[jjj] <<
" ";
1912 os <<
"\tNew values: ";
1913 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1914 os << vals2[jjj] <<
" ";
1919 else if (this->isLocallyIndexed ()) {
1920 local_inds_host_view_type lclColInds2;
1921 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1922 os <<
"\tNew local column indices: ";
1923 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1924 os << lclColInds2[jjj] <<
" ";
1926 os <<
"\tNew values: ";
1927 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1928 os << vals2[jjj] <<
" ";
1933 os <<
"Please report this bug to the Tpetra developers.";
1934 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1935 (
true, std::logic_error, os.str ());
1937 #endif // HAVE_TPETRA_DEBUG
1940 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1944 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1945 const Teuchos::ArrayView<const Scalar>& values)
1947 using Teuchos::toString;
1950 typedef LocalOrdinal LO;
1951 typedef GlobalOrdinal GO;
1952 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
1953 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
1954 const char tfecfFuncName[] =
"insertGlobalValues: ";
1956 #ifdef HAVE_TPETRA_DEBUG
1957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1958 (values.size () != indices.size (), std::runtime_error,
1959 "values.size() = " << values.size () <<
" != indices.size() = "
1960 << indices.size () <<
".");
1961 #endif // HAVE_TPETRA_DEBUG
1965 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
1968 if (lclRow == OTLO::invalid ()) {
1975 this->insertNonownedGlobalValues (gblRow, indices, values);
1978 if (this->isStaticGraph ()) {
1980 const int myRank = rowMap.getComm ()->getRank ();
1981 const int numProcs = rowMap.getComm ()->getSize ();
1982 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1983 (
true, std::runtime_error,
1984 "The matrix was constructed with a constant (\"static\") graph, "
1985 "yet the given global row index " << gblRow <<
" is in the row "
1986 "Map on the calling process (with rank " << myRank <<
", of " <<
1987 numProcs <<
" process(es)). In this case, you may not insert "
1988 "new entries into rows owned by the calling process.");
1992 const IST*
const inputVals =
1993 reinterpret_cast<const IST*
> (values.getRawPtr ());
1994 const GO*
const inputGblColInds = indices.getRawPtr ();
1995 const size_t numInputEnt = indices.size ();
2004 if (! graph.
colMap_.is_null ()) {
2010 #ifdef HAVE_TPETRA_DEBUG
2011 Teuchos::Array<GO> badColInds;
2012 #endif // HAVE_TPETRA_DEBUG
2013 const size_type numEntriesToInsert = indices.size ();
2014 bool allInColMap =
true;
2015 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2017 allInColMap =
false;
2018 #ifdef HAVE_TPETRA_DEBUG
2019 badColInds.push_back (indices[k]);
2022 #endif // HAVE_TPETRA_DEBUG
2025 if (! allInColMap) {
2026 std::ostringstream os;
2027 os <<
"You attempted to insert entries in owned row " << gblRow
2028 <<
", at the following column indices: " << toString (indices)
2030 #ifdef HAVE_TPETRA_DEBUG
2031 os <<
"Of those, the following indices are not in the column Map "
2032 "on this process: " << toString (badColInds) <<
"." << endl
2033 <<
"Since the matrix has a column Map already, it is invalid "
2034 "to insert entries at those locations.";
2036 os <<
"At least one of those indices is not in the column Map "
2037 "on this process." << endl <<
"It is invalid to insert into "
2038 "columns not in the column Map on the process that owns the "
2040 #endif // HAVE_TPETRA_DEBUG
2041 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2042 (
true, std::invalid_argument, os.str ());
2046 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2047 inputVals, numInputEnt);
2052 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2056 const LocalOrdinal numEnt,
2057 const Scalar vals[],
2058 const GlobalOrdinal inds[])
2060 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2061 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2062 this->insertGlobalValues (globalRow, indsT, valsT);
2066 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2070 const GlobalOrdinal gblRow,
2071 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2072 const Teuchos::ArrayView<const Scalar>& values,
2075 typedef impl_scalar_type IST;
2076 typedef LocalOrdinal LO;
2077 typedef GlobalOrdinal GO;
2078 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2079 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2082 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2083 (values.size () != indices.size (), std::runtime_error,
2084 "values.size() = " << values.size () <<
" != indices.size() = "
2085 << indices.size () <<
".");
2090 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2091 const LO lclRow = rowMap.getLocalElement (gblRow);
2092 if (lclRow == OTLO::invalid ()) {
2099 this->insertNonownedGlobalValues (gblRow, indices, values);
2102 if (this->isStaticGraph ()) {
2104 const int myRank = rowMap.getComm ()->getRank ();
2105 const int numProcs = rowMap.getComm ()->getSize ();
2106 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2107 (
true, std::runtime_error,
2108 "The matrix was constructed with a constant (\"static\") graph, "
2109 "yet the given global row index " << gblRow <<
" is in the row "
2110 "Map on the calling process (with rank " << myRank <<
", of " <<
2111 numProcs <<
" process(es)). In this case, you may not insert "
2112 "new entries into rows owned by the calling process.");
2115 crs_graph_type& graph = * (this->myGraph_);
2116 const IST*
const inputVals =
2117 reinterpret_cast<const IST*
> (values.getRawPtr ());
2118 const GO*
const inputGblColInds = indices.getRawPtr ();
2119 const size_t numInputEnt = indices.size ();
2120 RowInfo rowInfo = graph.getRowInfo (lclRow);
2122 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2129 const map_type& colMap = * (graph.colMap_);
2130 size_t curOffset = 0;
2131 while (curOffset < numInputEnt) {
2135 Teuchos::Array<LO> lclIndices;
2136 size_t endOffset = curOffset;
2137 for ( ; endOffset < numInputEnt; ++endOffset) {
2138 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2139 if (lclIndex != OTLO::invalid())
2140 lclIndices.push_back(lclIndex);
2147 const LO numIndInSeq = (endOffset - curOffset);
2148 if (numIndInSeq != 0) {
2149 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2155 const bool invariant = endOffset == numInputEnt ||
2156 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2157 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2158 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2160 curOffset = endOffset + 1;
2163 else if (! graph.colMap_.is_null ()) {
2164 const map_type& colMap = * (graph.colMap_);
2165 size_t curOffset = 0;
2166 while (curOffset < numInputEnt) {
2170 size_t endOffset = curOffset;
2171 for ( ; endOffset < numInputEnt &&
2172 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2178 const LO numIndInSeq = (endOffset - curOffset);
2179 if (numIndInSeq != 0) {
2180 rowInfo = graph.getRowInfo(lclRow);
2181 this->insertGlobalValuesImpl (graph, rowInfo,
2182 inputGblColInds + curOffset,
2183 inputVals + curOffset,
2190 const bool invariant = endOffset == numInputEnt ||
2191 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2192 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2193 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2195 curOffset = endOffset + 1;
2199 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2200 inputVals, numInputEnt);
2205 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2207 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2208 insertGlobalValuesFilteredChecked(
2209 const GlobalOrdinal gblRow,
2210 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2211 const Teuchos::ArrayView<const Scalar>& values,
2212 const char*
const prefix,
2220 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2222 catch(std::exception& e) {
2223 std::ostringstream os;
2225 const size_t maxNumToPrint =
2227 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2228 "exception: " << e.what() << endl
2229 <<
"Global row index: " << gblRow << endl;
2237 os <<
": insertGlobalValuesFiltered threw an exception: "
2240 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2244 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2250 const LocalOrdinal inds[],
2252 const LocalOrdinal numElts)
2254 typedef LocalOrdinal LO;
2255 typedef GlobalOrdinal GO;
2256 const bool sorted = graph.
isSorted ();
2266 for (LO j = 0; j < numElts; ++j) {
2267 const LO lclColInd = inds[j];
2268 const size_t offset =
2269 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2270 lclColInd, hint, sorted);
2271 if (offset != rowInfo.numEntries) {
2272 rowVals[offset] = newVals[j];
2279 if (graph.
colMap_.is_null ()) {
2280 return Teuchos::OrdinalTraits<LO>::invalid ();
2288 for (LO j = 0; j < numElts; ++j) {
2290 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2291 const size_t offset =
2292 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2293 gblColInd, hint, sorted);
2294 if (offset != rowInfo.numEntries) {
2295 rowVals[offset] = newVals[j];
2314 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2318 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2319 const Teuchos::ArrayView<const Scalar>& vals)
2321 typedef LocalOrdinal LO;
2323 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2324 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2325 return Teuchos::OrdinalTraits<LO>::invalid ();
2327 const LO*
const inputInds = lclCols.getRawPtr ();
2328 const Scalar*
const inputVals = vals.getRawPtr ();
2329 return this->replaceLocalValues (localRow, numInputEnt,
2330 inputVals, inputInds);
2333 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2339 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2340 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2343 const LO numInputEnt = inputInds.extent(0);
2344 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2345 return Teuchos::OrdinalTraits<LO>::invalid();
2347 const Scalar*
const inVals =
2348 reinterpret_cast<const Scalar*
>(inputVals.data());
2349 return this->replaceLocalValues(localRow, numInputEnt,
2350 inVals, inputInds.data());
2353 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2357 const LocalOrdinal numEnt,
2358 const Scalar inputVals[],
2359 const LocalOrdinal inputCols[])
2362 typedef LocalOrdinal LO;
2364 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2366 return Teuchos::OrdinalTraits<LO>::invalid ();
2371 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2374 return static_cast<LO
> (0);
2376 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2377 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2378 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2379 inputCols, inVals, numEnt);
2382 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2388 const GlobalOrdinal inds[],
2390 const LocalOrdinal numElts)
2392 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2394 [&](
size_t const k,
size_t const ,
size_t const offset) {
2395 rowVals[offset] = newVals[k];
2397 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2401 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2405 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2406 const Teuchos::ArrayView<const Scalar>& inputVals)
2408 typedef LocalOrdinal LO;
2410 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2411 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2412 return Teuchos::OrdinalTraits<LO>::invalid ();
2414 return this->replaceGlobalValues (globalRow, numInputEnt,
2415 inputVals.getRawPtr (),
2416 inputGblColInds.getRawPtr ());
2419 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2423 const LocalOrdinal numEnt,
2424 const Scalar inputVals[],
2425 const GlobalOrdinal inputGblColInds[])
2428 typedef LocalOrdinal LO;
2430 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2432 return Teuchos::OrdinalTraits<LO>::invalid ();
2437 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2440 return static_cast<LO
> (0);
2443 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2444 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2445 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2446 inputGblColInds, inVals, numEnt);
2449 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2455 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2456 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2465 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2466 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2467 return Teuchos::OrdinalTraits<LO>::invalid();
2469 const Scalar*
const inVals =
2470 reinterpret_cast<const Scalar*
>(inputVals.data());
2471 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2475 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2481 const GlobalOrdinal inds[],
2483 const LocalOrdinal numElts,
2486 typedef LocalOrdinal LO;
2487 typedef GlobalOrdinal GO;
2489 const bool sorted = graph.
isSorted ();
2498 if (graph.
colMap_.is_null ()) {
2509 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2511 for (LO j = 0; j < numElts; ++j) {
2513 if (lclColInd != LINV) {
2514 const size_t offset =
2515 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2516 lclColInd, hint, sorted);
2517 if (offset != rowInfo.numEntries) {
2519 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2522 rowVals[offset] += newVals[j];
2535 for (LO j = 0; j < numElts; ++j) {
2536 const GO gblColInd = inds[j];
2537 const size_t offset =
2538 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2539 gblColInd, hint, sorted);
2540 if (offset != rowInfo.numEntries) {
2542 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2545 rowVals[offset] += newVals[j];
2559 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2563 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2564 const Teuchos::ArrayView<const Scalar>& inputVals,
2567 typedef LocalOrdinal LO;
2569 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2570 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2571 return Teuchos::OrdinalTraits<LO>::invalid ();
2573 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2574 inputVals.getRawPtr (),
2575 inputGblColInds.getRawPtr (),
2579 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2583 const LocalOrdinal numInputEnt,
2584 const Scalar inputVals[],
2585 const GlobalOrdinal inputGblColInds[],
2589 typedef LocalOrdinal LO;
2590 typedef GlobalOrdinal GO;
2592 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2594 return Teuchos::OrdinalTraits<LO>::invalid ();
2599 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2604 using Teuchos::ArrayView;
2605 ArrayView<const GO> inputGblColInds_av(
2606 numInputEnt == 0 ?
nullptr : inputGblColInds,
2608 ArrayView<const Scalar> inputVals_av(
2609 numInputEnt == 0 ?
nullptr :
2610 inputVals, numInputEnt);
2615 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2626 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2627 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2628 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2629 inputGblColInds, inVals,
2630 numInputEnt, atomic);
2634 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2638 const LocalOrdinal numInputEnt,
2639 const impl_scalar_type inputVals[],
2640 const LocalOrdinal inputCols[],
2641 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2644 using Tpetra::Details::OrdinalTraits;
2645 typedef LocalOrdinal LO;
2647 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2649 return Teuchos::OrdinalTraits<LO>::invalid ();
2651 const crs_graph_type& graph = * (this->staticGraph_);
2652 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2654 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2657 return static_cast<LO
> (0);
2659 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2660 return this->transformLocalValues (curRowVals.data (), graph,
2661 rowInfo, inputCols, inputVals,
2662 numInputEnt, f, atomic);
2665 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2667 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2668 transformGlobalValues (
const GlobalOrdinal gblRow,
2669 const LocalOrdinal numInputEnt,
2670 const impl_scalar_type inputVals[],
2671 const GlobalOrdinal inputCols[],
2672 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2675 using Tpetra::Details::OrdinalTraits;
2676 typedef LocalOrdinal LO;
2678 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2680 return OrdinalTraits<LO>::invalid ();
2682 const crs_graph_type& graph = * (this->staticGraph_);
2683 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2685 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2688 return static_cast<LO
> (0);
2690 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2691 return this->transformGlobalValues (curRowVals.data (), graph,
2692 rowInfo, inputCols, inputVals,
2693 numInputEnt, f, atomic);
2696 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2698 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2699 transformLocalValues (impl_scalar_type rowVals[],
2700 const crs_graph_type& graph,
2701 const RowInfo& rowInfo,
2702 const LocalOrdinal inds[],
2703 const impl_scalar_type newVals[],
2704 const LocalOrdinal numElts,
2705 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2708 typedef impl_scalar_type ST;
2709 typedef LocalOrdinal LO;
2710 typedef GlobalOrdinal GO;
2717 const bool sorted = graph.isSorted ();
2722 if (graph.isLocallyIndexed ()) {
2725 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2727 for (LO j = 0; j < numElts; ++j) {
2728 const LO lclColInd = inds[j];
2729 const size_t offset =
2730 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2731 lclColInd, hint, sorted);
2732 if (offset != rowInfo.numEntries) {
2741 ST*
const dest = &rowVals[offset];
2742 (void) atomic_binary_function_update (dest, newVals[j], f);
2746 rowVals[offset] = f (rowVals[offset], newVals[j]);
2753 else if (graph.isGloballyIndexed ()) {
2757 if (graph.colMap_.is_null ()) {
2764 const map_type& colMap = * (graph.colMap_);
2767 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2769 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2770 for (LO j = 0; j < numElts; ++j) {
2771 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2772 if (gblColInd != GINV) {
2773 const size_t offset =
2774 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2775 gblColInd, hint, sorted);
2776 if (offset != rowInfo.numEntries) {
2785 ST*
const dest = &rowVals[offset];
2786 (void) atomic_binary_function_update (dest, newVals[j], f);
2790 rowVals[offset] = f (rowVals[offset], newVals[j]);
2805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2807 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2808 transformGlobalValues (impl_scalar_type rowVals[],
2809 const crs_graph_type& graph,
2810 const RowInfo& rowInfo,
2811 const GlobalOrdinal inds[],
2812 const impl_scalar_type newVals[],
2813 const LocalOrdinal numElts,
2814 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2817 typedef impl_scalar_type ST;
2818 typedef LocalOrdinal LO;
2819 typedef GlobalOrdinal GO;
2826 const bool sorted = graph.isSorted ();
2831 if (graph.isGloballyIndexed ()) {
2834 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2836 for (LO j = 0; j < numElts; ++j) {
2837 const GO gblColInd = inds[j];
2838 const size_t offset =
2839 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2840 gblColInd, hint, sorted);
2841 if (offset != rowInfo.numEntries) {
2850 ST*
const dest = &rowVals[offset];
2851 (void) atomic_binary_function_update (dest, newVals[j], f);
2855 rowVals[offset] = f (rowVals[offset], newVals[j]);
2862 else if (graph.isLocallyIndexed ()) {
2866 if (graph.colMap_.is_null ()) {
2872 const map_type& colMap = * (graph.colMap_);
2875 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2877 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2878 for (LO j = 0; j < numElts; ++j) {
2879 const LO lclColInd = colMap.getLocalElement (inds[j]);
2880 if (lclColInd != LINV) {
2881 const size_t offset =
2882 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2883 lclColInd, hint, sorted);
2884 if (offset != rowInfo.numEntries) {
2893 ST*
const dest = &rowVals[offset];
2894 (void) atomic_binary_function_update (dest, newVals[j], f);
2898 rowVals[offset] = f (rowVals[offset], newVals[j]);
2913 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2919 const LocalOrdinal inds[],
2921 const LocalOrdinal numElts,
2924 typedef LocalOrdinal LO;
2925 typedef GlobalOrdinal GO;
2927 const bool sorted = graph.
isSorted ();
2937 for (LO j = 0; j < numElts; ++j) {
2938 const LO lclColInd = inds[j];
2939 const size_t offset =
2940 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2941 lclColInd, hint, sorted);
2942 if (offset != rowInfo.numEntries) {
2944 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2947 rowVals[offset] += newVals[j];
2955 if (graph.
colMap_.is_null ()) {
2956 return Teuchos::OrdinalTraits<LO>::invalid ();
2964 for (LO j = 0; j < numElts; ++j) {
2966 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2967 const size_t offset =
2968 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2969 gblColInd, hint, sorted);
2970 if (offset != rowInfo.numEntries) {
2972 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2975 rowVals[offset] += newVals[j];
2995 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2999 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3000 const Teuchos::ArrayView<const Scalar>& values,
3004 const LO numInputEnt =
static_cast<LO
>(indices.size());
3005 if (static_cast<LO>(values.size()) != numInputEnt) {
3006 return Teuchos::OrdinalTraits<LO>::invalid();
3008 const LO*
const inputInds = indices.getRawPtr();
3009 const scalar_type*
const inputVals = values.getRawPtr();
3010 return this->sumIntoLocalValues(localRow, numInputEnt,
3011 inputVals, inputInds, atomic);
3014 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3020 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3021 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3025 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3026 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3027 return Teuchos::OrdinalTraits<LO>::invalid();
3030 reinterpret_cast<const scalar_type*
>(inputVals.data());
3031 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3032 inputInds.data(), atomic);
3035 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3039 const LocalOrdinal numEnt,
3040 const Scalar vals[],
3041 const LocalOrdinal cols[],
3045 typedef LocalOrdinal LO;
3047 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3049 return Teuchos::OrdinalTraits<LO>::invalid ();
3054 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3057 return static_cast<LO
> (0);
3059 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3060 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3061 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3062 cols, inputVals, numEnt, atomic);
3065 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3067 values_dualv_type::t_host::const_type
3071 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3072 return typename values_dualv_type::t_host::const_type ();
3074 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3079 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3081 values_dualv_type::t_host
3085 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3086 return typename values_dualv_type::t_host ();
3088 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3093 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3095 values_dualv_type::t_dev::const_type
3099 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3100 return typename values_dualv_type::t_dev::const_type ();
3102 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3107 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3109 values_dualv_type::t_dev
3113 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3114 return typename values_dualv_type::t_dev ();
3116 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3122 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3126 nonconst_local_inds_host_view_type &indices,
3127 nonconst_values_host_view_type &values,
3128 size_t& numEntries)
const
3130 using Teuchos::ArrayView;
3131 using Teuchos::av_reinterpret_cast;
3132 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3134 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3135 (! this->hasColMap (), std::runtime_error,
3136 "The matrix does not have a column Map yet. This means we don't have "
3137 "local indices for columns yet, so it doesn't make sense to call this "
3138 "method. If the matrix doesn't have a column Map yet, you should call "
3139 "fillComplete on it first.");
3141 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3142 const size_t theNumEntries = rowinfo.numEntries;
3143 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3144 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3145 static_cast<size_t> (values.size ()) < theNumEntries,
3146 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3147 theNumEntries <<
" entry/ies, but indices.size() = " <<
3148 indices.size () <<
" and values.size() = " << values.size () <<
".");
3149 numEntries = theNumEntries;
3151 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3152 if (staticGraph_->isLocallyIndexed ()) {
3153 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3154 auto curVals = getValuesViewHost(rowinfo);
3156 for (
size_t j = 0; j < theNumEntries; ++j) {
3157 values[j] = curVals[j];
3158 indices[j] = curLclInds(j);
3161 else if (staticGraph_->isGloballyIndexed ()) {
3163 const map_type& colMap = * (staticGraph_->colMap_);
3164 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3165 auto curVals = getValuesViewHost(rowinfo);
3167 for (
size_t j = 0; j < theNumEntries; ++j) {
3168 values[j] = curVals[j];
3176 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3180 nonconst_global_inds_host_view_type &indices,
3181 nonconst_values_host_view_type &values,
3182 size_t& numEntries)
const
3184 using Teuchos::ArrayView;
3185 using Teuchos::av_reinterpret_cast;
3186 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3189 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3190 const size_t theNumEntries = rowinfo.numEntries;
3191 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3192 static_cast<size_t> (indices.size ()) < theNumEntries ||
3193 static_cast<size_t> (values.size ()) < theNumEntries,
3194 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3195 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3196 indices.size () <<
" and values.size() = " << values.size () <<
".");
3197 numEntries = theNumEntries;
3199 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3200 if (staticGraph_->isLocallyIndexed ()) {
3201 const map_type& colMap = * (staticGraph_->colMap_);
3202 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3203 auto curVals = getValuesViewHost(rowinfo);
3205 for (
size_t j = 0; j < theNumEntries; ++j) {
3206 values[j] = curVals[j];
3210 else if (staticGraph_->isGloballyIndexed ()) {
3211 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3212 auto curVals = getValuesViewHost(rowinfo);
3214 for (
size_t j = 0; j < theNumEntries; ++j) {
3215 values[j] = curVals[j];
3216 indices[j] = curGblInds(j);
3223 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3227 local_inds_host_view_type &indices,
3228 values_host_view_type &values)
const
3230 const char tfecfFuncName[] =
"getLocalRowView: ";
3232 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3233 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3234 "its indices as global indices, so you cannot get a view with local "
3235 "column indices. If the matrix has a column Map, you may call "
3236 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3237 "a view with global column indices by calling getGlobalRowCopy().");
3239 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3240 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3241 rowInfo.numEntries > 0) {
3242 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3246 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3253 indices = local_inds_host_view_type();
3254 values = values_host_view_type();
3257 #ifdef HAVE_TPETRA_DEBUG
3258 const char suffix[] =
". This should never happen. Please report this "
3259 "bug to the Tpetra developers.";
3260 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3261 (static_cast<size_t> (indices.size ()) !=
3262 static_cast<size_t> (values.size ()), std::logic_error,
3263 "At the end of this method, for local row " << localRow <<
", "
3264 "indices.size() = " << indices.size () <<
" != values.size () = "
3265 << values.size () << suffix);
3266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3267 (static_cast<size_t> (indices.size ()) !=
3268 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3269 "At the end of this method, for local row " << localRow <<
", "
3270 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3271 << rowInfo.numEntries << suffix);
3272 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3273 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3274 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3275 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3276 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3277 expectedNumEntries << suffix);
3278 #endif // HAVE_TPETRA_DEBUG
3282 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3286 global_inds_host_view_type &indices,
3287 values_host_view_type &values)
const
3289 const char tfecfFuncName[] =
"getGlobalRowView: ";
3291 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3292 isLocallyIndexed (), std::runtime_error,
3293 "The matrix is locally indexed, so we cannot return a view of the row "
3294 "with global column indices. Use getGlobalRowCopy() instead.");
3299 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3300 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3301 rowInfo.numEntries > 0) {
3302 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3305 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3310 indices = global_inds_host_view_type();
3311 values = values_host_view_type();
3314 #ifdef HAVE_TPETRA_DEBUG
3315 const char suffix[] =
". This should never happen. Please report this "
3316 "bug to the Tpetra developers.";
3317 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3318 (static_cast<size_t> (indices.size ()) !=
3319 static_cast<size_t> (values.size ()), std::logic_error,
3320 "At the end of this method, for global row " << globalRow <<
", "
3321 "indices.size() = " << indices.size () <<
" != values.size () = "
3322 << values.size () << suffix);
3323 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3324 (static_cast<size_t> (indices.size ()) !=
3325 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3326 "At the end of this method, for global row " << globalRow <<
", "
3327 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3328 << rowInfo.numEntries << suffix);
3329 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3330 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3331 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3332 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3333 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3334 " " << expectedNumEntries << suffix);
3335 #endif // HAVE_TPETRA_DEBUG
3339 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3346 const size_t nlrs = staticGraph_->getLocalNumRows ();
3347 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3348 if (! staticGraph_->indicesAreAllocated () ||
3349 nlrs == 0 || numEntries == 0) {
3354 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3355 KokkosBlas::scal(vals, theAlpha, vals);
3360 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3371 const size_t numEntries = staticGraph_->getLocalNumEntries();
3372 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3380 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3384 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3387 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3388 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3389 const typename local_matrix_device_type::values_type& values)
3392 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3393 const char tfecfFuncName[] =
"setAllValues: ";
3394 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3395 (columnIndices.size () != values.size (), std::invalid_argument,
3396 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3397 " = " << values.size () <<
".");
3398 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3399 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3402 myGraph_->setAllIndices (rowPointers, columnIndices);
3404 catch (std::exception &e) {
3405 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3406 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3407 "exception: " << e.what ());
3414 auto lclGraph = myGraph_->getLocalGraphDevice ();
3415 const size_t numEnt = lclGraph.entries.extent (0);
3416 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3417 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3418 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3419 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3420 "local graph. Please report this bug to the Tpetra developers.");
3423 valuesUnpacked_wdv = valuesPacked_wdv;
3427 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3429 checkInternalState ();
3432 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3438 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3440 auto graph = localDeviceMatrix.graph;
3443 auto rows = graph.row_map;
3444 auto columns = graph.entries;
3445 auto values = localDeviceMatrix.values;
3447 setAllValues(rows,columns,values);
3450 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3454 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3455 const Teuchos::ArrayRCP<Scalar>& val)
3457 using Kokkos::Compat::getKokkosViewDeepCopy;
3458 using Teuchos::ArrayRCP;
3459 using Teuchos::av_reinterpret_cast;
3462 typedef typename local_graph_device_type::row_map_type row_map_type;
3464 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3470 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3471 Kokkos::View<
const size_t*,
3472 typename row_map_type::array_layout,
3474 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3478 (ptrNative.extent (0) != ptrSizeT.extent (0),
3479 std::logic_error,
"ptrNative.extent(0) = " <<
3480 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3481 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3482 "Tpetra developers.");
3484 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3485 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3486 this->setAllValues (ptrNative, indIn, valIn);
3489 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3494 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3495 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3496 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3503 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3504 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3505 offsets.resize (lclNumRows);
3511 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3516 Kokkos::MemoryUnmanaged> output_type;
3517 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3518 staticGraph_->getLocalDiagOffsets (offsetsOut);
3521 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3522 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3523 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3524 Kokkos::MemoryUnmanaged> output_type;
3525 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3531 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3536 using Teuchos::ArrayRCP;
3537 using Teuchos::ArrayView;
3538 using Teuchos::av_reinterpret_cast;
3539 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3544 staticGraph_.is_null (), std::runtime_error,
3545 "This method requires that the matrix have a graph.");
3546 auto rowMapPtr = this->getRowMap ();
3547 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3553 auto colMapPtr = this->getColMap ();
3554 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3555 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3556 "This method requires that the matrix have a column Map.");
3557 const map_type& rowMap = * rowMapPtr;
3558 const map_type& colMap = * colMapPtr;
3559 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3561 #ifdef HAVE_TPETRA_DEBUG
3564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3565 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3566 "The input Vector's Map must be compatible with the CrsMatrix's row "
3567 "Map. You may check this by using Map's isCompatible method: "
3568 "diag.getMap ()->isCompatible (A.getRowMap ());");
3569 #endif // HAVE_TPETRA_DEBUG
3573 const auto D_lcl_1d =
3574 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3576 const auto lclRowMap = rowMap.getLocalMap ();
3581 getLocalMatrixDevice ());
3584 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3589 Kokkos::MemoryUnmanaged>& offsets)
const
3591 typedef LocalOrdinal LO;
3593 #ifdef HAVE_TPETRA_DEBUG
3594 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3595 const map_type& rowMap = * (this->getRowMap ());
3598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3599 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3600 "The input Vector's Map must be compatible with (in the sense of Map::"
3601 "isCompatible) the CrsMatrix's row Map.");
3602 #endif // HAVE_TPETRA_DEBUG
3612 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3615 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3617 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3618 getLocalMatrixDevice ());
3621 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3625 const Teuchos::ArrayView<const size_t>& offsets)
const
3627 using LO = LocalOrdinal;
3628 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3631 #ifdef HAVE_TPETRA_DEBUG
3632 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3633 const map_type& rowMap = * (this->getRowMap ());
3636 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3637 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3638 "The input Vector's Map must be compatible with (in the sense of Map::"
3639 "isCompatible) the CrsMatrix's row Map.");
3640 #endif // HAVE_TPETRA_DEBUG
3652 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3654 using host_offsets_view_type =
3655 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3656 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3657 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3659 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3660 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3661 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3663 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3664 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3665 Kokkos::parallel_for
3666 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3667 range_type (0, myNumRows),
3668 [&, INV, h_offsets] (
const LO lclRow) {
3669 lclVecHost1d(lclRow) = STS::zero ();
3670 if (h_offsets[lclRow] != INV) {
3671 auto curRowOffset = rowPtrsPackedHost (lclRow);
3672 lclVecHost1d(lclRow) =
3673 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3680 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3685 using ::Tpetra::Details::ProfilingRegion;
3686 using Teuchos::ArrayRCP;
3687 using Teuchos::ArrayView;
3688 using Teuchos::null;
3691 using Teuchos::rcpFromRef;
3693 const char tfecfFuncName[] =
"leftScale: ";
3695 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3697 RCP<const vec_type> xp;
3698 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3701 auto exporter = this->getCrsGraphRef ().getExporter ();
3702 if (exporter.get () !=
nullptr) {
3703 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3704 tempVec->doImport (x, *exporter,
REPLACE);
3708 xp = rcpFromRef (x);
3711 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3712 xp = rcpFromRef (x);
3715 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3716 (
true, std::invalid_argument,
"x's Map must be the same as "
3717 "either the row Map or the range Map of the CrsMatrix.");
3720 if (this->isFillComplete()) {
3721 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3722 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3725 x_lcl_1d,
false,
false);
3729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3730 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3735 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3740 using ::Tpetra::Details::ProfilingRegion;
3741 using Teuchos::ArrayRCP;
3742 using Teuchos::ArrayView;
3743 using Teuchos::null;
3746 using Teuchos::rcpFromRef;
3748 const char tfecfFuncName[] =
"rightScale: ";
3750 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3752 RCP<const vec_type> xp;
3753 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3756 auto importer = this->getCrsGraphRef ().getImporter ();
3757 if (importer.get () !=
nullptr) {
3758 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3759 tempVec->doImport (x, *importer,
REPLACE);
3763 xp = rcpFromRef (x);
3766 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3767 xp = rcpFromRef (x);
3769 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3770 (
true, std::runtime_error,
"x's Map must be the same as "
3771 "either the domain Map or the column Map of the CrsMatrix.");
3774 if (this->isFillComplete()) {
3775 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3776 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3779 x_lcl_1d,
false,
false);
3783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3784 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3789 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3794 using range_type = Kokkos::RangePolicy<execution_space, local_ordinal_type>;
3795 Kokkos::parallel_reduce(
3796 "getNormInf", range_type(0, equilInfo.rowNorms.extent(0)),
3798 max = equilInfo.rowNorms(i);
3800 Kokkos::Max<mag_type>(myMax));
3802 Teuchos::reduceAll<int, mag_type>(*(getComm()), Teuchos::REDUCE_MAX, myMax,
3803 Teuchos::outArg(totalMax));
3807 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3811 if (assumeSymmetric)
3812 return getNormInf();
3815 using range_type = Kokkos::RangePolicy<execution_space, local_ordinal_type>;
3816 Kokkos::parallel_reduce(
3817 "getNorm1", range_type(0, equilInfo.colNorms.extent(0)),
3819 max = equilInfo.colNorms(i);
3821 Kokkos::Max<mag_type>(myMax));
3823 Teuchos::reduceAll<int, mag_type>(*(getComm()), Teuchos::REDUCE_MAX, myMax,
3824 Teuchos::outArg(totalMax));
3828 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3833 using Teuchos::ArrayView;
3834 using Teuchos::outArg;
3835 using Teuchos::REDUCE_SUM;
3836 using Teuchos::reduceAll;
3844 if (getLocalNumEntries() > 0) {
3845 if (isStorageOptimized ()) {
3848 const size_t numEntries = getLocalNumEntries ();
3849 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3850 for (
size_t k = 0; k < numEntries; ++k) {
3851 auto val = values[k];
3855 const mag_type val_abs = STS::abs (val);
3856 mySum += val_abs * val_abs;
3860 const LocalOrdinal numRows =
3861 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3862 for (LocalOrdinal r = 0; r < numRows; ++r) {
3863 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3864 const size_t numEntries = rowInfo.numEntries;
3865 auto A_r = this->getValuesViewHost(rowInfo);
3866 for (
size_t k = 0; k < numEntries; ++k) {
3868 const mag_type val_abs = STS::abs (val);
3869 mySum += val_abs * val_abs;
3875 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3876 mySum, outArg (totalSum));
3877 return STM::sqrt (totalSum);
3880 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3885 const char tfecfFuncName[] =
"replaceColMap: ";
3889 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3890 myGraph_.is_null (), std::runtime_error,
3891 "This method does not work if the matrix has a const graph. The whole "
3892 "idea of a const graph is that you are not allowed to change it, but "
3893 "this method necessarily must modify the graph, since the graph owns "
3894 "the matrix's column Map.");
3895 myGraph_->replaceColMap (newColMap);
3898 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3902 const Teuchos::RCP<const map_type>& newColMap,
3903 const Teuchos::RCP<const import_type>& newImport,
3904 const bool sortEachRow)
3906 const char tfecfFuncName[] =
"reindexColumns: ";
3907 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3908 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3909 "The input graph is null, but the matrix does not own its graph.");
3911 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3912 const bool sortGraph =
false;
3916 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3917 const LocalOrdinal lclNumRows =
3918 static_cast<LocalOrdinal
> (theGraph.getLocalNumRows ());
3920 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3922 const RowInfo rowInfo = theGraph.getRowInfo (row);
3923 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
3924 auto vals = this->getValuesViewHostNonConst (rowInfo);
3926 sort2 (lclColInds.data (),
3927 lclColInds.data () + rowInfo.numEntries,
3930 theGraph.indicesAreSorted_ =
true;
3934 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3939 const char tfecfFuncName[] =
"replaceDomainMap: ";
3940 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3941 myGraph_.is_null (), std::runtime_error,
3942 "This method does not work if the matrix has a const graph. The whole "
3943 "idea of a const graph is that you are not allowed to change it, but this"
3944 " method necessarily must modify the graph, since the graph owns the "
3945 "matrix's domain Map and Import objects.");
3946 myGraph_->replaceDomainMap (newDomainMap);
3949 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3953 Teuchos::RCP<const import_type>& newImporter)
3955 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3956 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3957 myGraph_.is_null (), std::runtime_error,
3958 "This method does not work if the matrix has a const graph. The whole "
3959 "idea of a const graph is that you are not allowed to change it, but this"
3960 " method necessarily must modify the graph, since the graph owns the "
3961 "matrix's domain Map and Import objects.");
3962 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3965 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3970 const char tfecfFuncName[] =
"replaceRangeMap: ";
3971 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3972 myGraph_.is_null (), std::runtime_error,
3973 "This method does not work if the matrix has a const graph. The whole "
3974 "idea of a const graph is that you are not allowed to change it, but this"
3975 " method necessarily must modify the graph, since the graph owns the "
3976 "matrix's domain Map and Import objects.");
3977 myGraph_->replaceRangeMap (newRangeMap);
3980 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3984 Teuchos::RCP<const export_type>& newExporter)
3986 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
3987 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3988 myGraph_.is_null (), std::runtime_error,
3989 "This method does not work if the matrix has a const graph. The whole "
3990 "idea of a const graph is that you are not allowed to change it, but this"
3991 " method necessarily must modify the graph, since the graph owns the "
3992 "matrix's domain Map and Import objects.");
3993 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
3996 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4000 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4001 const Teuchos::ArrayView<const Scalar>& values)
4003 using Teuchos::Array;
4004 typedef GlobalOrdinal GO;
4005 typedef typename Array<GO>::size_type size_type;
4007 const size_type numToInsert = indices.size ();
4010 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4011 Array<GO>& curRowInds = curRow.first;
4012 Array<Scalar>& curRowVals = curRow.second;
4013 const size_type newCapacity = curRowInds.size () + numToInsert;
4014 curRowInds.reserve (newCapacity);
4015 curRowVals.reserve (newCapacity);
4016 for (size_type k = 0; k < numToInsert; ++k) {
4017 curRowInds.push_back (indices[k]);
4018 curRowVals.push_back (values[k]);
4022 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4029 using Teuchos::Comm;
4030 using Teuchos::outArg;
4033 using Teuchos::REDUCE_MAX;
4034 using Teuchos::REDUCE_MIN;
4035 using Teuchos::reduceAll;
4039 typedef GlobalOrdinal GO;
4040 typedef typename Teuchos::Array<GO>::size_type size_type;
4041 const char tfecfFuncName[] =
"globalAssemble: ";
4042 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4044 const bool verbose = Behavior::verbose(
"CrsMatrix");
4045 std::unique_ptr<std::string> prefix;
4047 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4048 std::ostringstream os;
4049 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4051 std::cerr << os.str();
4053 RCP<const Comm<int> > comm = getComm ();
4055 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4056 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4057 "you may call this method.");
4059 const size_t myNumNonlocalRows = nonlocals_.size ();
4066 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4067 int someoneHasNonlocalRows = 0;
4068 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4069 outArg (someoneHasNonlocalRows));
4070 if (someoneHasNonlocalRows == 0) {
4084 RCP<const map_type> nonlocalRowMap;
4085 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4087 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4088 size_type curPos = 0;
4089 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4090 ++mapIter, ++curPos) {
4091 myNonlocalGblRows[curPos] = mapIter->first;
4094 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4095 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4102 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4103 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4104 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4105 merge2 (gblCols_newEnd, vals_newEnd,
4106 gblCols.begin (), gblCols.end (),
4107 vals.begin (), vals.end ());
4108 gblCols.erase (gblCols_newEnd, gblCols.end ());
4109 vals.erase (vals_newEnd, vals.end ());
4110 numEntPerNonlocalRow[curPos] = gblCols.size ();
4121 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4123 auto iter = std::min_element (myNonlocalGblRows.begin (),
4124 myNonlocalGblRows.end ());
4125 if (iter != myNonlocalGblRows.end ()) {
4126 myMinNonlocalGblRow = *iter;
4129 GO gblMinNonlocalGblRow = 0;
4130 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4131 outArg (gblMinNonlocalGblRow));
4132 const GO indexBase = gblMinNonlocalGblRow;
4133 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4134 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4143 std::ostringstream os;
4144 os << *prefix <<
"Create nonlocal matrix" << endl;
4145 std::cerr << os.str();
4147 RCP<crs_matrix_type> nonlocalMatrix =
4148 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4150 size_type curPos = 0;
4151 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4152 ++mapIter, ++curPos) {
4153 const GO gblRow = mapIter->first;
4155 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4156 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4158 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4170 auto origRowMap = this->getRowMap ();
4171 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4173 int isLocallyComplete = 1;
4175 if (origRowMapIsOneToOne) {
4177 std::ostringstream os;
4178 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4179 std::cerr << os.str();
4181 export_type exportToOrig (nonlocalRowMap, origRowMap);
4183 isLocallyComplete = 0;
4186 std::ostringstream os;
4187 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4188 std::cerr << os.str();
4190 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4195 std::ostringstream os;
4196 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4197 std::cerr << os.str();
4204 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4206 isLocallyComplete = 0;
4214 std::ostringstream os;
4215 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4217 std::cerr << os.str();
4219 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4221 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4227 std::ostringstream os;
4228 os << *prefix <<
"Free nonlocalMatrix" << endl;
4229 std::cerr << os.str();
4231 nonlocalMatrix = Teuchos::null;
4235 std::ostringstream os;
4236 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4237 std::cerr << os.str();
4239 import_type importToOrig (oneToOneRowMap, origRowMap);
4240 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4248 std::ostringstream os;
4249 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4250 std::cerr << os.str();
4252 decltype (nonlocals_) newNonlocals;
4253 std::swap (nonlocals_, newNonlocals);
4262 int isGloballyComplete = 0;
4263 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4264 outArg (isGloballyComplete));
4265 TEUCHOS_TEST_FOR_EXCEPTION
4266 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4267 "you called insertGlobalValues with a global row index which is not in "
4268 "the matrix's row Map on any process in its communicator.");
4271 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4276 if (! isStaticGraph ()) {
4277 myGraph_->resumeFill (params);
4280 applyHelper.reset();
4281 fillComplete_ =
false;
4284 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4288 return getCrsGraphRef ().haveGlobalConstants ();
4291 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4296 const char tfecfFuncName[] =
"fillComplete(params): ";
4298 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4299 (this->getCrsGraph ().is_null (), std::logic_error,
4300 "getCrsGraph() returns null. This should not happen at this point. "
4301 "Please report this bug to the Tpetra developers.");
4311 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4312 Teuchos::RCP<const map_type> domainMap = rangeMap;
4313 this->fillComplete (domainMap, rangeMap, params);
4317 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4321 const Teuchos::RCP<const map_type>& rangeMap,
4322 const Teuchos::RCP<Teuchos::ParameterList>& params)
4326 using Teuchos::ArrayRCP;
4330 const char tfecfFuncName[] =
"fillComplete: ";
4331 ProfilingRegion regionFillComplete
4332 (
"Tpetra::CrsMatrix::fillComplete");
4333 const bool verbose = Behavior::verbose(
"CrsMatrix");
4334 std::unique_ptr<std::string> prefix;
4336 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4337 std::ostringstream os;
4338 os << *prefix << endl;
4339 std::cerr << os.str ();
4342 "Tpetra::CrsMatrix::fillCompete",
4345 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4346 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4347 "Matrix fill state must be active (isFillActive() "
4348 "must be true) before you may call fillComplete().");
4349 const int numProcs = this->getComm ()->getSize ();
4359 bool assertNoNonlocalInserts =
false;
4362 bool sortGhosts =
true;
4364 if (! params.is_null ()) {
4365 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4366 assertNoNonlocalInserts);
4367 if (params->isParameter (
"sort column map ghost gids")) {
4368 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4370 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4371 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4376 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4378 if (! this->myGraph_.is_null ()) {
4379 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4382 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4383 if (this->hasColMap ()) {
4384 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4387 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4392 if (needGlobalAssemble) {
4393 this->globalAssemble ();
4396 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4397 (numProcs == 1 && nonlocals_.size() > 0,
4398 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4399 "An invalid entry (i.e., with row index not in the row Map) must have "
4400 "been submitted to the CrsMatrix.");
4403 if (this->isStaticGraph ()) {
4411 #ifdef HAVE_TPETRA_DEBUG
4429 const bool domainMapsMatch =
4430 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4431 const bool rangeMapsMatch =
4432 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4434 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4435 (! domainMapsMatch, std::runtime_error,
4436 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4437 "The graph cannot be changed because it was given to the CrsMatrix "
4438 "constructor as const. You can fix this by passing in the graph's "
4439 "domain Map and range Map to the matrix's fillComplete call.");
4441 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4442 (! rangeMapsMatch, std::runtime_error,
4443 "The CrsMatrix's range Map does not match the graph's range Map. "
4444 "The graph cannot be changed because it was given to the CrsMatrix "
4445 "constructor as const. You can fix this by passing in the graph's "
4446 "domain Map and range Map to the matrix's fillComplete call.");
4447 #endif // HAVE_TPETRA_DEBUG
4451 this->fillLocalMatrix (params);
4459 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4462 Teuchos::Array<int> remotePIDs (0);
4463 const bool mustBuildColMap = ! this->hasColMap ();
4464 if (mustBuildColMap) {
4465 this->myGraph_->makeColMap (remotePIDs);
4470 const std::pair<size_t, std::string> makeIndicesLocalResult =
4471 this->myGraph_->makeIndicesLocal(verbose);
4476 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4477 (makeIndicesLocalResult.first != 0, std::runtime_error,
4478 makeIndicesLocalResult.second);
4480 const bool sorted = this->myGraph_->isSorted ();
4481 const bool merged = this->myGraph_->isMerged ();
4482 this->sortAndMergeIndicesAndValues (sorted, merged);
4487 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4491 this->fillLocalGraphAndMatrix (params);
4493 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4494 params->get (
"compute global constants",
true);
4495 if (callGraphComputeGlobalConstants) {
4496 this->myGraph_->computeGlobalConstants ();
4499 this->myGraph_->computeLocalConstants ();
4501 this->myGraph_->fillComplete_ =
true;
4502 this->myGraph_->checkInternalState ();
4507 this->fillComplete_ =
true;
4510 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4512 this->checkInternalState ();
4516 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4520 const Teuchos::RCP<const map_type> & rangeMap,
4521 const Teuchos::RCP<const import_type>& importer,
4522 const Teuchos::RCP<const export_type>& exporter,
4523 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4525 #ifdef HAVE_TPETRA_MMM_TIMINGS
4527 if(!params.is_null())
4528 label = params->get(
"Timer Label",label);
4529 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4530 using Teuchos::TimeMonitor;
4532 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4535 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4536 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4537 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4538 "must be true) before calling fillComplete().");
4539 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4540 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4543 #ifdef HAVE_TPETRA_MMM_TIMINGS
4544 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4547 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4551 #ifdef HAVE_TPETRA_MMM_TIMINGS
4552 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4555 fillLocalGraphAndMatrix (params);
4560 fillComplete_ =
true;
4563 #ifdef HAVE_TPETRA_DEBUG
4564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4565 ": We're at the end of fillComplete(), but isFillActive() is true. "
4566 "Please report this bug to the Tpetra developers.");
4567 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4568 ": We're at the end of fillComplete(), but isFillActive() is true. "
4569 "Please report this bug to the Tpetra developers.");
4570 #endif // HAVE_TPETRA_DEBUG
4572 #ifdef HAVE_TPETRA_MMM_TIMINGS
4573 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4576 checkInternalState();
4580 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4586 LocalOrdinal* beg = cols;
4587 LocalOrdinal* end = cols + rowLen;
4588 LocalOrdinal* newend = beg;
4590 LocalOrdinal* cur = beg + 1;
4594 while (cur != end) {
4595 if (*cur != *newend) {
4612 return newend - beg;
4615 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4620 using ::Tpetra::Details::ProfilingRegion;
4621 typedef LocalOrdinal LO;
4622 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4623 host_execution_space;
4624 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4625 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4626 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4628 if (! sorted || ! merged) {
4629 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4630 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4631 "\"static\" (const) graph, since the matrix does not own the graph.");
4632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4633 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4634 "this matrix claims ! isStaticGraph(). "
4635 "Please report this bug to the Tpetra developers.");
4636 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4637 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4638 "this method if the graph's storage has already been optimized. "
4639 "Please report this bug to the Tpetra developers.");
4642 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4643 size_t totalNumDups = 0;
4648 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4650 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4651 [=] (
const LO lclRow,
size_t& numDups) {
4652 size_t rowBegin = rowBegins_(lclRow);
4653 size_t rowLen = rowLengths_(lclRow);
4654 LO* cols = cols_.data() + rowBegin;
4657 sort2 (cols, cols + rowLen, vals);
4660 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4661 rowLengths_(lclRow) = newRowLength;
4662 numDups += rowLen - newRowLength;
4675 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4686 using Teuchos::rcp_const_cast;
4687 using Teuchos::rcpFromRef;
4688 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4689 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4695 if (alpha == ZERO) {
4698 }
else if (beta != ONE) {
4712 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4713 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4719 const bool Y_is_overwritten = (beta ==
ZERO);
4722 const bool Y_is_replicated =
4723 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4731 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4738 RCP<const MV> X_colMap;
4739 if (importer.is_null ()) {
4747 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4749 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4754 X_colMap = rcpFromRef (X_in);
4758 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4764 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4767 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4768 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4775 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4782 if (! exporter.is_null ()) {
4783 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4785 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4791 if (Y_is_overwritten) {
4817 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4824 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4828 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4836 if (Y_is_replicated) {
4837 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4842 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4847 const Teuchos::ETransp mode,
4852 using Teuchos::null;
4855 using Teuchos::rcp_const_cast;
4856 using Teuchos::rcpFromRef;
4857 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4860 if (alpha == ZERO) {
4883 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4884 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4889 const bool Y_is_replicated = (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4890 const bool Y_is_overwritten = (beta ==
ZERO);
4891 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4897 X = rcp (
new MV (X_in, Teuchos::Copy));
4899 X = rcpFromRef (X_in);
4903 if (importer != Teuchos::null) {
4904 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4907 if (importMV_ == null) {
4908 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4911 if (exporter != Teuchos::null) {
4912 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4915 if (exportMV_ == null) {
4916 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4922 if (! exporter.is_null ()) {
4923 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4924 exportMV_->doImport (X_in, *exporter,
INSERT);
4931 if (importer != Teuchos::null) {
4932 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4939 importMV_->putScalar (ZERO);
4941 this->localApply (*X, *importMV_, mode, alpha, ZERO);
4943 if (Y_is_overwritten) {
4960 MV Y (Y_in, Teuchos::Copy);
4961 this->localApply (*X, Y, mode, alpha, beta);
4964 this->localApply (*X, Y_in, mode, alpha, beta);
4971 if (Y_is_replicated) {
4972 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4977 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4982 const Teuchos::ETransp mode,
4983 const Scalar& alpha,
4984 const Scalar& beta)
const
4987 using Teuchos::NO_TRANS;
4988 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
4995 const char tfecfFuncName[] =
"localApply: ";
4996 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5000 const bool transpose = (mode != Teuchos::NO_TRANS);
5001 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5003 getColMap ()->getLocalNumElements (), std::runtime_error,
5004 "NO_TRANS case: X has the wrong number of local rows. "
5006 "getColMap()->getLocalNumElements() = " <<
5007 getColMap ()->getLocalNumElements () <<
".");
5008 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5010 getRowMap ()->getLocalNumElements (), std::runtime_error,
5011 "NO_TRANS case: Y has the wrong number of local rows. "
5013 "getRowMap()->getLocalNumElements() = " <<
5014 getRowMap ()->getLocalNumElements () <<
".");
5015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5017 getRowMap ()->getLocalNumElements (), std::runtime_error,
5018 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5020 <<
" != getRowMap()->getLocalNumElements() = "
5021 << getRowMap ()->getLocalNumElements () <<
".");
5022 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5024 getColMap ()->getLocalNumElements (), std::runtime_error,
5025 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5027 <<
" != getColMap()->getLocalNumElements() = "
5028 << getColMap ()->getLocalNumElements () <<
".");
5029 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5030 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5031 "fill complete. You must call fillComplete() (possibly with "
5032 "domain and range Map arguments) without an intervening "
5033 "resumeFill() call before you may call this method.");
5034 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5036 std::runtime_error,
"X and Y must be constant stride.");
5041 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5042 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5043 && X_lcl.extent(0) != 0,
5044 std::runtime_error,
"X and Y may not alias one another.");
5047 auto A_lcl = getLocalMatrixDevice();
5049 if(!applyHelper.get()) {
5052 bool useMergePath =
false;
5053 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
5059 if constexpr(std::is_same_v<execution_space, Kokkos::Cuda>) {
5060 LocalOrdinal nrows = getLocalNumRows();
5061 LocalOrdinal maxRowImbalance = 0;
5063 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5066 useMergePath =
true;
5069 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
5070 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
5074 const char* modeKK =
nullptr;
5077 case Teuchos::NO_TRANS:
5078 modeKK = KokkosSparse::NoTranspose;
break;
5079 case Teuchos::TRANS:
5080 modeKK = KokkosSparse::Transpose;
break;
5081 case Teuchos::CONJ_TRANS:
5082 modeKK = KokkosSparse::ConjugateTranspose;
break;
5084 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5087 if(applyHelper->shouldUseIntRowptrs())
5089 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5091 &applyHelper->handle_int, modeKK,
5097 &applyHelper->handle, modeKK,
5102 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5107 Teuchos::ETransp mode,
5112 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5114 TEUCHOS_TEST_FOR_EXCEPTION
5115 (! isFillComplete (), std::runtime_error,
5116 fnName <<
": Cannot call apply() until fillComplete() "
5117 "has been called.");
5119 if (mode == Teuchos::NO_TRANS) {
5120 ProfilingRegion regionNonTranspose (fnName);
5121 this->applyNonTranspose (X, Y, alpha, beta);
5124 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5125 this->applyTranspose (X, Y, mode, alpha, beta);
5130 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5132 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5138 const char tfecfFuncName[] =
"convert: ";
5140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5141 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5142 "of the conversion) is not fill complete. You must first call "
5143 "fillComplete() (possibly with the domain and range Map) without an "
5144 "intervening call to resumeFill(), before you may call this method.");
5146 RCP<output_matrix_type> newMatrix
5147 (
new output_matrix_type (this->getCrsGraph ()));
5151 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5152 this->getLocalMatrixDevice ().values);
5156 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5162 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5169 const char tfecfFuncName[] =
"checkInternalState: ";
5170 const char err[] =
"Internal state is not consistent. "
5171 "Please report this bug to the Tpetra developers.";
5175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5176 (staticGraph_.is_null (), std::logic_error, err);
5180 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5181 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5182 std::logic_error, err);
5184 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5185 (isFillComplete () && ! staticGraph_->isFillComplete (),
5186 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5187 "but its graph is NOT fill complete.");
5190 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5191 (staticGraph_->indicesAreAllocated () &&
5192 staticGraph_->getLocalAllocationSize() > 0 &&
5193 staticGraph_->getLocalNumRows() > 0 &&
5194 valuesUnpacked_wdv.extent (0) == 0,
5195 std::logic_error, err);
5199 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5204 std::ostringstream os;
5206 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5207 if (this->getObjectLabel () !=
"") {
5208 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5210 if (isFillComplete ()) {
5211 os <<
"isFillComplete: true"
5212 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5213 << getGlobalNumCols () <<
"]"
5214 <<
", global number of entries: " << getGlobalNumEntries ()
5218 os <<
"isFillComplete: false"
5219 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5220 << getGlobalNumCols () <<
"]}";
5225 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5229 const Teuchos::EVerbosityLevel verbLevel)
const
5233 using Teuchos::ArrayView;
5234 using Teuchos::Comm;
5236 using Teuchos::TypeNameTraits;
5237 using Teuchos::VERB_DEFAULT;
5238 using Teuchos::VERB_NONE;
5239 using Teuchos::VERB_LOW;
5240 using Teuchos::VERB_MEDIUM;
5241 using Teuchos::VERB_HIGH;
5242 using Teuchos::VERB_EXTREME;
5244 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5246 if (vl == VERB_NONE) {
5251 Teuchos::OSTab tab0 (out);
5253 RCP<const Comm<int> > comm = this->getComm();
5254 const int myRank = comm->getRank();
5255 const int numProcs = comm->getSize();
5257 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5260 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5270 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5272 Teuchos::OSTab tab1 (out);
5275 if (this->getObjectLabel () !=
"") {
5276 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5279 out <<
"Template parameters:" << endl;
5280 Teuchos::OSTab tab2 (out);
5281 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5282 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5283 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5284 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5286 if (isFillComplete()) {
5287 out <<
"isFillComplete: true" << endl
5288 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5289 << getGlobalNumCols () <<
"]" << endl
5290 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5291 << endl <<
"Global max number of entries in a row: "
5292 << getGlobalMaxNumRowEntries () << endl;
5295 out <<
"isFillComplete: false" << endl
5296 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5297 << getGlobalNumCols () <<
"]" << endl;
5301 if (vl < VERB_MEDIUM) {
5307 out << endl <<
"Row Map:" << endl;
5309 if (getRowMap ().is_null ()) {
5311 out <<
"null" << endl;
5318 getRowMap ()->describe (out, vl);
5323 out <<
"Column Map: ";
5325 if (getColMap ().is_null ()) {
5327 out <<
"null" << endl;
5329 }
else if (getColMap () == getRowMap ()) {
5331 out <<
"same as row Map" << endl;
5337 getColMap ()->describe (out, vl);
5342 out <<
"Domain Map: ";
5344 if (getDomainMap ().is_null ()) {
5346 out <<
"null" << endl;
5348 }
else if (getDomainMap () == getRowMap ()) {
5350 out <<
"same as row Map" << endl;
5352 }
else if (getDomainMap () == getColMap ()) {
5354 out <<
"same as column Map" << endl;
5360 getDomainMap ()->describe (out, vl);
5365 out <<
"Range Map: ";
5367 if (getRangeMap ().is_null ()) {
5369 out <<
"null" << endl;
5371 }
else if (getRangeMap () == getDomainMap ()) {
5373 out <<
"same as domain Map" << endl;
5375 }
else if (getRangeMap () == getRowMap ()) {
5377 out <<
"same as row Map" << endl;
5383 getRangeMap ()->describe (out, vl);
5387 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5388 if (myRank == curRank) {
5389 out <<
"Process rank: " << curRank << endl;
5390 Teuchos::OSTab tab2 (out);
5391 if (! staticGraph_->indicesAreAllocated ()) {
5392 out <<
"Graph indices not allocated" << endl;
5395 out <<
"Number of allocated entries: "
5396 << staticGraph_->getLocalAllocationSize () << endl;
5398 out <<
"Number of entries: " << getLocalNumEntries () << endl
5399 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5408 if (vl < VERB_HIGH) {
5413 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5414 if (myRank == curRank) {
5415 out << std::setw(width) <<
"Proc Rank"
5416 << std::setw(width) <<
"Global Row"
5417 << std::setw(width) <<
"Num Entries";
5418 if (vl == VERB_EXTREME) {
5419 out << std::setw(width) <<
"(Index,Value)";
5422 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5423 const size_t nE = getNumEntriesInLocalRow(r);
5424 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5425 out << std::setw(width) << myRank
5426 << std::setw(width) << gid
5427 << std::setw(width) << nE;
5428 if (vl == VERB_EXTREME) {
5429 if (isGloballyIndexed()) {
5430 global_inds_host_view_type rowinds;
5431 values_host_view_type rowvals;
5432 getGlobalRowView (gid, rowinds, rowvals);
5433 for (
size_t j = 0; j < nE; ++j) {
5434 out <<
" (" << rowinds[j]
5435 <<
", " << rowvals[j]
5439 else if (isLocallyIndexed()) {
5440 local_inds_host_view_type rowinds;
5441 values_host_view_type rowvals;
5442 getLocalRowView (r, rowinds, rowvals);
5443 for (
size_t j=0; j < nE; ++j) {
5444 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5445 <<
", " << rowvals[j]
5461 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5475 return (srcRowMat !=
nullptr);
5478 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5482 const typename crs_graph_type::padding_type& padding,
5488 using LO = local_ordinal_type;
5489 using row_ptrs_type =
5490 typename local_graph_device_type::row_map_type::non_const_type;
5491 using range_policy =
5492 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5493 const char tfecfFuncName[] =
"applyCrsPadding";
5494 const char suffix[] =
5495 ". Please report this bug to the Tpetra developers.";
5496 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5498 std::unique_ptr<std::string> prefix;
5500 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5501 std::ostringstream os;
5502 os << *prefix <<
"padding: ";
5505 std::cerr << os.str();
5507 const int myRank = ! verbose ? -1 : [&] () {
5508 auto map = this->getMap();
5509 if (map.is_null()) {
5512 auto comm = map->getComm();
5513 if (comm.is_null()) {
5516 return comm->getRank();
5520 if (! myGraph_->indicesAreAllocated()) {
5522 std::ostringstream os;
5523 os << *prefix <<
"Call allocateIndices" << endl;
5524 std::cerr << os.str();
5526 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5538 std::ostringstream os;
5539 os << *prefix <<
"Allocate row_ptrs_beg: "
5540 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5541 std::cerr << os.str();
5543 using Kokkos::view_alloc;
5544 using Kokkos::WithoutInitializing;
5545 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5546 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5548 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5550 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5551 size_t(row_ptr_beg.extent(0) - 1);
5553 std::ostringstream os;
5554 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5555 std::cerr << os.str();
5557 row_ptrs_type row_ptr_end(
5558 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5560 row_ptrs_type num_row_entries_d;
5562 const bool refill_num_row_entries =
5563 myGraph_->k_numRowEntries_.extent(0) != 0;
5565 if (refill_num_row_entries) {
5568 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5569 myGraph_->k_numRowEntries_);
5570 Kokkos::parallel_for
5571 (
"Fill end row pointers", range_policy(0, N),
5572 KOKKOS_LAMBDA (
const size_t i) {
5573 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5580 Kokkos::parallel_for
5581 (
"Fill end row pointers", range_policy(0, N),
5582 KOKKOS_LAMBDA (
const size_t i) {
5583 row_ptr_end(i) = row_ptr_beg(i+1);
5587 if (myGraph_->isGloballyIndexed()) {
5589 myGraph_->gblInds_wdv,
5590 valuesUnpacked_wdv, padding, myRank, verbose);
5591 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5592 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5593 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5594 (newValuesLen != newColIndsLen, std::logic_error,
5595 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5596 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5601 myGraph_->lclIndsUnpacked_wdv,
5602 valuesUnpacked_wdv, padding, myRank, verbose);
5603 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5604 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5605 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5606 (newValuesLen != newColIndsLen, std::logic_error,
5607 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5608 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5612 if (refill_num_row_entries) {
5613 Kokkos::parallel_for
5614 (
"Fill num entries", range_policy(0, N),
5615 KOKKOS_LAMBDA (
const size_t i) {
5616 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5622 std::ostringstream os;
5623 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5624 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5625 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5626 std::cerr << os.str();
5627 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5628 row_ptr_beg.extent(0) );
5630 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5633 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5635 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5636 copyAndPermuteStaticGraph(
5637 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5638 const size_t numSameIDs,
5639 const LocalOrdinal permuteToLIDs[],
5640 const LocalOrdinal permuteFromLIDs[],
5641 const size_t numPermutes)
5643 using Details::ProfilingRegion;
5644 using Teuchos::Array;
5645 using Teuchos::ArrayView;
5647 using LO = LocalOrdinal;
5648 using GO = GlobalOrdinal;
5649 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5650 const char suffix[] =
5651 " Please report this bug to the Tpetra developers.";
5652 ProfilingRegion regionCAP
5653 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5657 std::unique_ptr<std::string> prefix;
5659 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5660 std::ostringstream os;
5661 os << *prefix <<
"Start" << endl;
5663 const char*
const prefix_raw =
5664 verbose ? prefix.get()->c_str() :
nullptr;
5666 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5671 const map_type& srcRowMap = * (srcMat.getRowMap ());
5672 nonconst_global_inds_host_view_type rowInds;
5673 nonconst_values_host_view_type rowVals;
5674 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5675 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5679 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5680 const GO targetGID = sourceGID;
5682 ArrayView<const GO>rowIndsConstView;
5683 ArrayView<const Scalar> rowValsConstView;
5685 if (sourceIsLocallyIndexed) {
5686 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5687 if (rowLength > static_cast<size_t> (rowInds.size())) {
5688 Kokkos::resize(rowInds,rowLength);
5689 Kokkos::resize(rowVals,rowLength);
5693 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5694 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5699 size_t checkRowLength = 0;
5700 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5701 rowValsView, checkRowLength);
5703 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5704 (rowLength != checkRowLength, std::logic_error,
"For "
5705 "global row index " << sourceGID <<
", the source "
5706 "matrix's getNumEntriesInGlobalRow returns a row length "
5707 "of " << rowLength <<
", but getGlobalRowCopy reports "
5708 "a row length of " << checkRowLength <<
"." << suffix);
5715 rowIndsConstView = Teuchos::ArrayView<const GO> (
5716 rowIndsView.data(), rowIndsView.extent(0),
5717 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5718 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5719 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5720 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5725 global_inds_host_view_type rowIndsView;
5726 values_host_view_type rowValsView;
5727 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5732 rowIndsConstView = Teuchos::ArrayView<const GO> (
5733 rowIndsView.data(), rowIndsView.extent(0),
5734 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5735 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5736 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5737 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5745 combineGlobalValues(targetGID, rowIndsConstView,
5747 prefix_raw, debug, verbose);
5751 std::ostringstream os;
5752 os << *prefix <<
"Do permutes" << endl;
5755 const map_type& tgtRowMap = * (this->getRowMap ());
5756 for (
size_t p = 0; p < numPermutes; ++p) {
5757 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5758 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5760 ArrayView<const GO> rowIndsConstView;
5761 ArrayView<const Scalar> rowValsConstView;
5763 if (sourceIsLocallyIndexed) {
5764 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5765 if (rowLength > static_cast<size_t> (rowInds.size ())) {
5766 Kokkos::resize(rowInds,rowLength);
5767 Kokkos::resize(rowVals,rowLength);
5771 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5772 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5777 size_t checkRowLength = 0;
5778 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5779 rowValsView, checkRowLength);
5781 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5782 (rowLength != checkRowLength, std::logic_error,
"For "
5783 "source matrix global row index " << sourceGID <<
", "
5784 "getNumEntriesInGlobalRow returns a row length of " <<
5785 rowLength <<
", but getGlobalRowCopy a row length of "
5786 << checkRowLength <<
"." << suffix);
5793 rowIndsConstView = Teuchos::ArrayView<const GO> (
5794 rowIndsView.data(), rowIndsView.extent(0),
5795 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5796 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5797 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5798 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5803 global_inds_host_view_type rowIndsView;
5804 values_host_view_type rowValsView;
5805 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5810 rowIndsConstView = Teuchos::ArrayView<const GO> (
5811 rowIndsView.data(), rowIndsView.extent(0),
5812 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5813 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5814 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5815 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5820 combineGlobalValues(targetGID, rowIndsConstView,
5822 prefix_raw, debug, verbose);
5826 std::ostringstream os;
5827 os << *prefix <<
"Done" << endl;
5831 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5833 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5834 copyAndPermuteNonStaticGraph(
5835 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5836 const size_t numSameIDs,
5837 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5838 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5839 const size_t numPermutes)
5841 using Details::ProfilingRegion;
5842 using Teuchos::Array;
5843 using Teuchos::ArrayView;
5845 using LO = LocalOrdinal;
5846 using GO = GlobalOrdinal;
5847 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5848 const char suffix[] =
5849 " Please report this bug to the Tpetra developers.";
5850 ProfilingRegion regionCAP
5851 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5855 std::unique_ptr<std::string> prefix;
5857 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5858 std::ostringstream os;
5859 os << *prefix <<
"Start" << endl;
5861 const char*
const prefix_raw =
5862 verbose ? prefix.get()->c_str() :
nullptr;
5865 using row_graph_type = RowGraph<LO, GO, Node>;
5866 const row_graph_type& srcGraph = *(srcMat.getGraph());
5868 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5869 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5870 applyCrsPadding(*padding, verbose);
5872 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5877 const map_type& srcRowMap = * (srcMat.getRowMap ());
5878 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5879 using gids_type = nonconst_global_inds_host_view_type;
5880 using vals_type = nonconst_values_host_view_type;
5883 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5887 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5888 const GO targetGID = sourceGID;
5890 ArrayView<const GO> rowIndsConstView;
5891 ArrayView<const Scalar> rowValsConstView;
5893 if (sourceIsLocallyIndexed) {
5895 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5896 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5897 Kokkos::resize(rowInds,rowLength);
5898 Kokkos::resize(rowVals,rowLength);
5902 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5903 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5908 size_t checkRowLength = 0;
5909 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5912 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5913 (rowLength != checkRowLength, std::logic_error,
": For "
5914 "global row index " << sourceGID <<
", the source "
5915 "matrix's getNumEntriesInGlobalRow returns a row length "
5916 "of " << rowLength <<
", but getGlobalRowCopy reports "
5917 "a row length of " << checkRowLength <<
"." << suffix);
5919 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5920 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5923 global_inds_host_view_type rowIndsView;
5924 values_host_view_type rowValsView;
5925 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5931 rowIndsConstView = Teuchos::ArrayView<const GO> (
5932 rowIndsView.data(), rowIndsView.extent(0),
5933 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5934 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5935 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5936 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5942 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5943 rowValsConstView, prefix_raw, debug, verbose);
5947 std::ostringstream os;
5948 os << *prefix <<
"Do permutes" << endl;
5950 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5951 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5953 const map_type& tgtRowMap = * (this->getRowMap ());
5954 for (
size_t p = 0; p < numPermutes; ++p) {
5955 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5956 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5958 ArrayView<const GO> rowIndsConstView;
5959 ArrayView<const Scalar> rowValsConstView;
5961 if (sourceIsLocallyIndexed) {
5962 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5963 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5964 Kokkos::resize(rowInds,rowLength);
5965 Kokkos::resize(rowVals,rowLength);
5969 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5970 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5975 size_t checkRowLength = 0;
5976 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5977 rowValsView, checkRowLength);
5979 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5980 (rowLength != checkRowLength, std::logic_error,
"For "
5981 "source matrix global row index " << sourceGID <<
", "
5982 "getNumEntriesInGlobalRow returns a row length of " <<
5983 rowLength <<
", but getGlobalRowCopy a row length of "
5984 << checkRowLength <<
"." << suffix);
5986 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5987 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5990 global_inds_host_view_type rowIndsView;
5991 values_host_view_type rowValsView;
5992 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5998 rowIndsConstView = Teuchos::ArrayView<const GO> (
5999 rowIndsView.data(), rowIndsView.extent(0),
6000 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6001 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6002 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6003 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6009 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6010 rowValsConstView, prefix_raw, debug, verbose);
6014 std::ostringstream os;
6015 os << *prefix <<
"Done" << endl;
6019 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6024 const size_t numSameIDs,
6025 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6026 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6035 const char tfecfFuncName[] =
"copyAndPermute: ";
6036 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6038 const bool verbose = Behavior::verbose(
"CrsMatrix");
6039 std::unique_ptr<std::string> prefix;
6041 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6042 std::ostringstream os;
6043 os << *prefix << endl
6044 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6045 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6054 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6056 std::cerr << os.str ();
6059 const auto numPermute = permuteToLIDs.extent (0);
6060 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6061 (numPermute != permuteFromLIDs.extent (0),
6062 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6063 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6064 << permuteFromLIDs.extent (0) <<
".");
6069 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6070 if (isStaticGraph ()) {
6071 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6072 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6073 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6074 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6076 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6077 permuteToLIDs_h.data(),
6078 permuteFromLIDs_h.data(),
6082 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6083 permuteFromLIDs, numPermute);
6087 std::ostringstream os;
6088 os << *prefix <<
"Done" << endl;
6089 std::cerr << os.str();
6093 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6098 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6099 Kokkos::DualView<char*, buffer_device_type>& exports,
6100 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6101 size_t& constantNumPackets)
6106 using Teuchos::outArg;
6107 using Teuchos::REDUCE_MAX;
6108 using Teuchos::reduceAll;
6110 typedef LocalOrdinal LO;
6111 typedef GlobalOrdinal GO;
6112 const char tfecfFuncName[] =
"packAndPrepare: ";
6113 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6115 const bool debug = Behavior::debug(
"CrsMatrix");
6116 const bool verbose = Behavior::verbose(
"CrsMatrix");
6119 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6120 if (pComm.is_null ()) {
6123 const Teuchos::Comm<int>& comm = *pComm;
6124 const int myRank = comm.getSize ();
6126 std::unique_ptr<std::string> prefix;
6128 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6129 std::ostringstream os;
6130 os << *prefix <<
"Start" << endl
6140 std::cerr << os.str ();
6163 std::ostringstream msg;
6166 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6167 const crs_matrix_type* srcCrsMat =
6168 dynamic_cast<const crs_matrix_type*
> (&source);
6169 if (srcCrsMat !=
nullptr) {
6171 std::ostringstream os;
6172 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6173 "calling packNew" << endl;
6174 std::cerr << os.str ();
6177 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6178 constantNumPackets);
6180 catch (std::exception& e) {
6182 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6186 using Kokkos::HostSpace;
6187 using Kokkos::subview;
6188 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6189 using range_type = Kokkos::pair<size_t, size_t>;
6192 std::ostringstream os;
6193 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6195 std::cerr << os.str ();
6198 const row_matrix_type* srcRowMat =
6199 dynamic_cast<const row_matrix_type*
> (&source);
6200 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6201 (srcRowMat ==
nullptr, std::invalid_argument,
6202 "The source object of the Import or Export operation is neither a "
6203 "CrsMatrix (with the same template parameters as the target object), "
6204 "nor a RowMatrix (with the same first four template parameters as the "
6215 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6216 auto exportLIDs_h = exportLIDs.view_host ();
6217 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6218 exportLIDs_h.size ());
6222 Teuchos::Array<char> exports_a;
6228 numPacketsPerLID.clear_sync_state ();
6229 numPacketsPerLID.modify_host ();
6230 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6231 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6232 numPacketsPerLID_h.size ());
6237 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6238 constantNumPackets);
6240 catch (std::exception& e) {
6242 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6246 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6247 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6248 const std::string oldLabel = exports.view_device().label ();
6249 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6250 exports = exports_type (newLabel, newAllocSize);
6255 exports.modify_host();
6257 auto exports_h = exports.view_host ();
6258 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6262 typedef typename exports_type::t_host::execution_space HES;
6263 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6264 Kokkos::View<const char*, host_device_type>
6265 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6272 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6275 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6276 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6277 "one or more participating processes.");
6281 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6282 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6283 "or more participating processes. Here is this process' error "
6284 "message: " << msg.str ());
6288 std::ostringstream os;
6289 os << *prefix <<
"packAndPrepare: Done!" << endl
6299 std::cerr << os.str ();
6303 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6305 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6306 packRow (
char exports[],
6307 const size_t offset,
6308 const size_t numEnt,
6309 const GlobalOrdinal gidsIn[],
6310 const impl_scalar_type valsIn[],
6311 const size_t numBytesPerValue)
const
6314 using Kokkos::subview;
6316 typedef LocalOrdinal LO;
6317 typedef GlobalOrdinal GO;
6318 typedef impl_scalar_type ST;
6326 const LO numEntLO =
static_cast<size_t> (numEnt);
6328 const size_t numEntBeg = offset;
6330 const size_t gidsBeg = numEntBeg + numEntLen;
6331 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6332 const size_t valsBeg = gidsBeg + gidsLen;
6333 const size_t valsLen = numEnt * numBytesPerValue;
6335 char*
const numEntOut = exports + numEntBeg;
6336 char*
const gidsOut = exports + gidsBeg;
6337 char*
const valsOut = exports + valsBeg;
6339 size_t numBytesOut = 0;
6344 Kokkos::pair<int, size_t> p;
6345 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6346 errorCode += p.first;
6347 numBytesOut += p.second;
6349 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6350 errorCode += p.first;
6351 numBytesOut += p.second;
6354 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6355 TEUCHOS_TEST_FOR_EXCEPTION
6356 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6357 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6358 << expectedNumBytes <<
".");
6359 TEUCHOS_TEST_FOR_EXCEPTION
6360 (errorCode != 0, std::runtime_error,
"packRow: "
6361 "PackTraits::packArray returned a nonzero error code");
6366 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6368 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6369 unpackRow (GlobalOrdinal gidsOut[],
6370 impl_scalar_type valsOut[],
6371 const char imports[],
6372 const size_t offset,
6373 const size_t numBytes,
6374 const size_t numEnt,
6375 const size_t numBytesPerValue)
6378 using Kokkos::subview;
6380 typedef LocalOrdinal LO;
6381 typedef GlobalOrdinal GO;
6382 typedef impl_scalar_type ST;
6384 Details::ProfilingRegion region_upack_row(
6385 "Tpetra::CrsMatrix::unpackRow",
6389 if (numBytes == 0) {
6392 const int myRank = this->getMap ()->getComm ()->getRank ();
6393 TEUCHOS_TEST_FOR_EXCEPTION
6394 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6395 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6396 "number of entries to unpack (as reported by numPacketsPerLID) "
6397 "for this row numEnt=" << numEnt <<
" != 0.");
6402 if (numEnt == 0 && numBytes != 0) {
6403 const int myRank = this->getMap ()->getComm ()->getRank ();
6404 TEUCHOS_TEST_FOR_EXCEPTION
6405 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6406 "unpackRow: The number of entries to unpack (as reported by "
6407 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6408 "numBytes=" << numBytes <<
" != 0.");
6414 const size_t numEntBeg = offset;
6416 const size_t gidsBeg = numEntBeg + numEntLen;
6417 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6418 const size_t valsBeg = gidsBeg + gidsLen;
6419 const size_t valsLen = numEnt * numBytesPerValue;
6421 const char*
const numEntIn = imports + numEntBeg;
6422 const char*
const gidsIn = imports + gidsBeg;
6423 const char*
const valsIn = imports + valsBeg;
6425 size_t numBytesOut = 0;
6429 if (static_cast<size_t> (numEntOut) != numEnt ||
6430 numEntOut == static_cast<LO> (0)) {
6431 const int myRank = this->getMap ()->getComm ()->getRank ();
6432 std::ostringstream os;
6433 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6434 bool firstErrorCondition =
false;
6435 if (static_cast<size_t> (numEntOut) != numEnt) {
6436 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6437 <<
" does not equal number of entries unpacked from imports "
6438 "buffer numEntOut=" << numEntOut <<
".";
6439 firstErrorCondition =
true;
6441 if (numEntOut == static_cast<LO> (0)) {
6442 if (firstErrorCondition) {
6445 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6446 "but number of bytes to unpack for this row numBytes=" << numBytes
6447 <<
" != 0. This should never happen, since packRow should only "
6448 "ever pack rows with a nonzero number of entries. In this case, "
6449 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6452 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6456 Kokkos::pair<int, size_t> p;
6457 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6458 errorCode += p.first;
6459 numBytesOut += p.second;
6461 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6462 errorCode += p.first;
6463 numBytesOut += p.second;
6466 TEUCHOS_TEST_FOR_EXCEPTION
6467 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6468 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6470 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6471 TEUCHOS_TEST_FOR_EXCEPTION
6472 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6473 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6474 << expectedNumBytes <<
".");
6476 TEUCHOS_TEST_FOR_EXCEPTION
6477 (errorCode != 0, std::runtime_error,
"unpackRow: "
6478 "PackTraits::unpackArray returned a nonzero error code");
6483 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6485 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6486 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6487 size_t& totalNumEntries,
6488 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6490 using Details::Behavior;
6493 typedef impl_scalar_type IST;
6494 typedef LocalOrdinal LO;
6495 typedef GlobalOrdinal GO;
6501 const bool verbose = Behavior::verbose(
"CrsMatrix");
6502 std::unique_ptr<std::string> prefix;
6504 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6505 std::ostringstream os;
6506 os << *prefix <<
"Before:"
6514 std::cerr << os.str ();
6519 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6521 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6522 auto exportLIDs_h = exportLIDs.view_host ();
6525 totalNumEntries = 0;
6526 for (LO i = 0; i < numExportLIDs; ++i) {
6527 const LO lclRow = exportLIDs_h[i];
6528 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6531 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6534 totalNumEntries += curNumEntries;
6545 const size_t allocSize =
6546 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6547 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6548 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
6549 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6551 const std::string oldLabel = exports.view_device().label ();
6552 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6553 exports = exports_type (newLabel, allocSize);
6557 std::ostringstream os;
6558 os << *prefix <<
"After:"
6566 std::cerr << os.str ();
6570 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6573 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6574 Kokkos::DualView<char*, buffer_device_type>& exports,
6575 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6576 size_t& constantNumPackets)
const
6580 if (this->isStaticGraph ()) {
6583 constantNumPackets);
6586 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6587 constantNumPackets);
6591 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6594 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6595 Kokkos::DualView<char*, buffer_device_type>& exports,
6596 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6597 size_t& constantNumPackets)
const
6605 using LO = LocalOrdinal;
6606 using GO = GlobalOrdinal;
6607 using ST = impl_scalar_type;
6608 const char tfecfFuncName[] =
"packNonStaticNew: ";
6610 const bool verbose = Behavior::verbose(
"CrsMatrix");
6611 std::unique_ptr<std::string> prefix;
6613 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6614 std::ostringstream os;
6615 os << *prefix <<
"Start" << endl;
6616 std::cerr << os.str ();
6619 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6620 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6621 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
6622 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6623 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6629 constantNumPackets = 0;
6634 size_t totalNumEntries = 0;
6635 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6636 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6639 exports.clear_sync_state();
6640 exports.modify_host();
6641 auto exports_h = exports.view_host ();
6643 std::ostringstream os;
6644 os << *prefix <<
"After marking exports as modified on host, "
6646 std::cerr << os.str ();
6650 auto exportLIDs_h = exportLIDs.view_host ();
6653 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6654 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6655 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6660 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6664 typename global_inds_host_view_type::non_const_type gidsIn_k;
6665 if (this->isLocallyIndexed()) {
6667 typename global_inds_host_view_type::non_const_type(
"packGids",
6672 for (
size_t i = 0; i < numExportLIDs; ++i) {
6673 const LO lclRow = exportLIDs_h[i];
6675 size_t numBytes = 0;
6676 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6683 numPacketsPerLID_h[i] = 0;
6687 if (this->isLocallyIndexed ()) {
6688 typename global_inds_host_view_type::non_const_type gidsIn;
6689 values_host_view_type valsIn;
6693 local_inds_host_view_type lidsIn;
6694 this->getLocalRowView (lclRow, lidsIn, valsIn);
6695 const map_type& colMap = * (this->getColMap ());
6696 for (
size_t k = 0; k < numEnt; ++k) {
6697 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6699 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6701 const size_t numBytesPerValue =
6702 PackTraits<ST>::packValueCount (valsIn[0]);
6703 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6704 gidsIn.data (), valsIn.data (),
6707 else if (this->isGloballyIndexed ()) {
6708 global_inds_host_view_type gidsIn;
6709 values_host_view_type valsIn;
6715 const map_type& rowMap = * (this->getRowMap ());
6716 const GO gblRow = rowMap.getGlobalElement (lclRow);
6717 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6719 const size_t numBytesPerValue =
6720 PackTraits<ST>::packValueCount (valsIn[0]);
6721 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6722 gidsIn.data (), valsIn.data (),
6729 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6730 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6731 "First invalid offset into 'exports' pack buffer at index i = " << i
6732 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6733 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6738 numPacketsPerLID_h[i] = numBytes;
6743 std::ostringstream os;
6744 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6751 std::cerr << os.str ();
6755 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6757 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6758 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6759 const LocalOrdinal numEnt,
6760 const impl_scalar_type vals[],
6761 const GlobalOrdinal cols[],
6763 const char*
const prefix,
6767 using GO = GlobalOrdinal;
6771 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6772 Teuchos::ArrayView<const GO> cols_av
6773 (numEnt == 0 ?
nullptr : cols, numEnt);
6774 Teuchos::ArrayView<const Scalar> vals_av
6775 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
6780 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6781 prefix, debug, verbose);
6785 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6787 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6788 combineGlobalValues(
6789 const GlobalOrdinal globalRowIndex,
6790 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6791 const Teuchos::ArrayView<const Scalar>& values,
6793 const char*
const prefix,
6797 const char tfecfFuncName[] =
"combineGlobalValues: ";
6799 if (isStaticGraph ()) {
6803 if (combineMode ==
ADD) {
6804 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6806 else if (combineMode ==
REPLACE) {
6807 replaceGlobalValues (globalRowIndex, columnIndices, values);
6809 else if (combineMode ==
ABSMAX) {
6810 using ::Tpetra::Details::AbsMax;
6812 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6816 else if (combineMode ==
INSERT) {
6817 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6818 (isStaticGraph() && combineMode ==
INSERT,
6819 std::invalid_argument,
"INSERT combine mode is forbidden "
6820 "if the matrix has a static (const) graph (i.e., was "
6821 "constructed with the CrsMatrix constructor that takes a "
6822 "const CrsGraph pointer).");
6825 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6826 (
true, std::logic_error,
"Invalid combine mode; should "
6828 "Please report this bug to the Tpetra developers.");
6832 if (combineMode ==
ADD || combineMode ==
INSERT) {
6839 insertGlobalValuesFilteredChecked(globalRowIndex,
6840 columnIndices, values, prefix, debug, verbose);
6851 else if (combineMode ==
ABSMAX) {
6852 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6853 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6854 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6857 else if (combineMode ==
REPLACE) {
6858 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6859 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6860 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6864 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6865 true, std::logic_error,
"Should never get here! Please report this "
6866 "bug to the Tpetra developers.");
6871 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6875 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6876 Kokkos::DualView<char*, buffer_device_type> imports,
6877 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6878 const size_t constantNumPackets,
6885 const char tfecfFuncName[] =
"unpackAndCombine: ";
6886 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6888 const bool debug = Behavior::debug(
"CrsMatrix");
6889 const bool verbose = Behavior::verbose(
"CrsMatrix");
6890 constexpr
int numValidModes = 5;
6893 const char* validModeNames[numValidModes] =
6894 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6896 std::unique_ptr<std::string> prefix;
6898 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6899 std::ostringstream os;
6900 os << *prefix <<
"Start:" << endl
6910 << *prefix <<
" constantNumPackets: " << constantNumPackets
6914 std::cerr << os.str ();
6918 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6919 validModes+numValidModes) {
6920 std::ostringstream os;
6921 os <<
"Invalid combine mode. Valid modes are {";
6922 for (
int k = 0; k < numValidModes; ++k) {
6923 os << validModeNames[k];
6924 if (k < numValidModes - 1) {
6929 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6930 (
true, std::invalid_argument, os.str ());
6932 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6933 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6934 std::invalid_argument,
"importLIDs.extent(0)="
6935 << importLIDs.extent(0)
6936 <<
" != numPacketsPerLID.extent(0)="
6937 << numPacketsPerLID.extent(0) <<
".");
6940 if (combineMode ==
ZERO) {
6945 using Teuchos::reduceAll;
6946 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6949 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6950 constantNumPackets, combineMode,
6952 }
catch (std::exception& e) {
6957 const Teuchos::Comm<int>& comm = * (this->getComm ());
6958 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6959 lclBad, Teuchos::outArg (gblBad));
6965 std::ostringstream os;
6966 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6967 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6968 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
6969 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6970 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6971 "threw an exception on one or more participating processes: "
6972 << endl << msg->str ());
6976 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6977 constantNumPackets, combineMode,
6982 std::ostringstream os;
6983 os << *prefix <<
"Done!" << endl
6993 std::cerr << os.str ();
6997 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7001 const Kokkos::DualView<
const local_ordinal_type*,
7002 buffer_device_type>& importLIDs,
7003 Kokkos::DualView<char*, buffer_device_type> imports,
7004 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7005 const size_t constantNumPackets,
7010 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7014 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7015 std::unique_ptr<std::string> prefix;
7017 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7018 std::ostringstream os;
7019 os << *prefix <<
"isStaticGraph(): "
7020 << (isStaticGraph() ?
"true" :
"false")
7021 <<
", importLIDs.extent(0): "
7022 << importLIDs.extent(0)
7023 <<
", imports.extent(0): "
7024 << imports.extent(0)
7025 <<
", numPacketsPerLID.extent(0): "
7026 << numPacketsPerLID.extent(0)
7028 std::cerr << os.str();
7031 if (isStaticGraph ()) {
7032 using Details::unpackCrsMatrixAndCombineNew;
7033 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7034 importLIDs, constantNumPackets,
7039 using padding_type =
typename crs_graph_type::padding_type;
7040 std::unique_ptr<padding_type> padding;
7042 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7043 importLIDs, imports, numPacketsPerLID, verbose);
7045 catch (std::exception& e) {
7046 const auto rowMap = getRowMap();
7047 const auto comm = rowMap.is_null() ? Teuchos::null :
7049 const int myRank = comm.is_null() ? -1 : comm->getRank();
7050 TEUCHOS_TEST_FOR_EXCEPTION
7051 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7052 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7053 "threw an exception: " << e.what());
7056 std::ostringstream os;
7057 os << *prefix <<
"Call applyCrsPadding" << endl;
7058 std::cerr << os.str();
7060 applyCrsPadding(*padding, verbose);
7063 std::ostringstream os;
7064 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7065 std::cerr << os.str();
7067 unpackAndCombineImplNonStatic(importLIDs, imports,
7074 std::ostringstream os;
7075 os << *prefix <<
"Done" << endl;
7076 std::cerr << os.str();
7080 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7082 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7083 unpackAndCombineImplNonStatic(
7084 const Kokkos::DualView<
const local_ordinal_type*,
7085 buffer_device_type>& importLIDs,
7086 Kokkos::DualView<char*, buffer_device_type> imports,
7087 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7088 const size_t constantNumPackets,
7092 using Kokkos::subview;
7093 using Kokkos::MemoryUnmanaged;
7094 using Details::Behavior;
7097 using Details::PackTraits;
7098 using Details::ScalarViewTraits;
7100 using LO = LocalOrdinal;
7101 using GO = GlobalOrdinal;
7102 using ST = impl_scalar_type;
7103 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7105 typename View<int*, device_type>::HostMirror::execution_space;
7106 using pair_type = std::pair<typename View<int*, HES>::size_type,
7107 typename View<int*, HES>::size_type>;
7108 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7109 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7110 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7112 const bool debug = Behavior::debug(
"CrsMatrix");
7113 const bool verbose = Behavior::verbose(
"CrsMatrix");
7114 std::unique_ptr<std::string> prefix;
7116 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7117 std::ostringstream os;
7118 os << *prefix << endl;
7119 std::cerr << os.str ();
7121 const char*
const prefix_raw =
7122 verbose ? prefix.get()->c_str() :
nullptr;
7124 const size_type numImportLIDs = importLIDs.extent (0);
7125 if (combineMode ==
ZERO || numImportLIDs == 0) {
7129 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7130 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7135 if (imports.need_sync_host()) {
7136 imports.sync_host ();
7138 auto imports_h = imports.view_host();
7141 if (numPacketsPerLID.need_sync_host()) {
7142 numPacketsPerLID.sync_host ();
7144 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7146 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7147 auto importLIDs_h = importLIDs.view_host();
7149 size_t numBytesPerValue;
7160 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7165 size_t maxRowNumEnt = 0;
7166 for (size_type i = 0; i < numImportLIDs; ++i) {
7167 const size_t numBytes = numPacketsPerLID_h[i];
7168 if (numBytes == 0) {
7173 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7174 (offset + numBytes >
size_t(imports_h.extent (0)),
7175 std::logic_error,
": At local row index importLIDs_h[i="
7176 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7177 <<
") + numBytes (=" << numBytes <<
") > "
7178 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7183 const size_t theNumBytes =
7185 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7186 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7187 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7189 const char*
const inBuf = imports_h.data () + offset;
7190 const size_t actualNumBytes =
7194 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7195 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7196 <<
", actualNumBytes=" << actualNumBytes
7197 <<
" > numBytes=" << numBytes <<
".");
7198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7199 (numEntLO == 0, std::logic_error,
": At local row index "
7200 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7201 "the number of entries read from the packed data is "
7202 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7206 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7214 View<GO*, HES> gblColInds;
7215 View<LO*, HES> lclColInds;
7216 View<ST*, HES> vals;
7229 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7230 gid, maxRowNumEnt,
"gids");
7231 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7232 lid, maxRowNumEnt,
"lids");
7233 vals = ScalarViewTraits<ST, HES>::allocateArray(
7234 val, maxRowNumEnt,
"vals");
7238 for (size_type i = 0; i < numImportLIDs; ++i) {
7239 const size_t numBytes = numPacketsPerLID_h[i];
7240 if (numBytes == 0) {
7244 const char*
const inBuf = imports_h.data () + offset;
7247 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7248 const LO lclRow = importLIDs_h[i];
7250 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7251 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7253 const size_t numBytesOut =
7254 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7255 offset, numBytes, numEnt, numBytesPerValue);
7256 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7257 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7258 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7259 << numBytesOut <<
".");
7261 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7262 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7263 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7264 combineMode, prefix_raw, debug, verbose);
7270 std::ostringstream os;
7271 os << *prefix <<
"Done" << endl;
7272 std::cerr << os.str();
7276 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7277 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7280 const bool force)
const
7282 using Teuchos::null;
7286 TEUCHOS_TEST_FOR_EXCEPTION(
7287 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7288 "MapMultiVector: You may only call this method if the matrix has a "
7289 "column Map. If the matrix does not yet have a column Map, you should "
7290 "first call fillComplete (with domain and range Map if necessary).");
7294 TEUCHOS_TEST_FOR_EXCEPTION(
7295 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7296 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7297 "this matrix's graph is fill complete.");
7300 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7301 RCP<const map_type> colMap = this->getColMap ();
7314 if (! importer.is_null () || force) {
7315 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7316 X_colMap = rcp (
new MV (colMap, numVecs));
7319 importMV_ = X_colMap;
7322 X_colMap = importMV_;
7333 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7334 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7337 const bool force)
const
7339 using Teuchos::null;
7345 TEUCHOS_TEST_FOR_EXCEPTION(
7346 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7347 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7348 "matrix's graph is fill complete.");
7351 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7355 RCP<const map_type> rowMap = this->getRowMap ();
7367 if (! exporter.is_null () || force) {
7368 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7369 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7370 exportMV_ = Y_rowMap;
7373 Y_rowMap = exportMV_;
7379 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7384 TEUCHOS_TEST_FOR_EXCEPTION(
7385 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7386 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7387 "was created with a constant graph (that is, when it was created using "
7388 "the version of its constructor that takes an RCP<const CrsGraph>). "
7389 "This is because the matrix is not allowed to modify the graph in that "
7390 "case, but removing empty processes requires modifying the graph.");
7391 myGraph_->removeEmptyProcessesInPlace (newMap);
7395 this->map_ = this->getRowMap ();
7399 staticGraph_ = Teuchos::rcp_const_cast<
const Graph> (myGraph_);
7402 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7403 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7408 const Teuchos::RCP<const map_type>& domainMap,
7409 const Teuchos::RCP<const map_type>& rangeMap,
7410 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7412 using Teuchos::Array;
7413 using Teuchos::ArrayView;
7414 using Teuchos::ParameterList;
7417 using Teuchos::rcp_implicit_cast;
7418 using Teuchos::sublist;
7422 using crs_matrix_type =
7424 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7428 std::unique_ptr<std::string> prefix;
7430 prefix = this->createPrefix(
"CrsMatrix",
"add");
7431 std::ostringstream os;
7432 os << *prefix <<
"Start" << endl;
7433 std::cerr << os.str ();
7436 const crs_matrix_type& B = *
this;
7437 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7438 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7445 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7446 RCP<const map_type> B_domainMap = B.getDomainMap ();
7447 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7449 RCP<const map_type> theDomainMap = domainMap;
7450 RCP<const map_type> theRangeMap = rangeMap;
7452 if (domainMap.is_null ()) {
7453 if (B_domainMap.is_null ()) {
7454 TEUCHOS_TEST_FOR_EXCEPTION(
7455 A_domainMap.is_null (), std::invalid_argument,
7456 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7457 "then you must supply a nonnull domain Map to this method.");
7458 theDomainMap = A_domainMap;
7460 theDomainMap = B_domainMap;
7463 if (rangeMap.is_null ()) {
7464 if (B_rangeMap.is_null ()) {
7465 TEUCHOS_TEST_FOR_EXCEPTION(
7466 A_rangeMap.is_null (), std::invalid_argument,
7467 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7468 "then you must supply a nonnull range Map to this method.");
7469 theRangeMap = A_rangeMap;
7471 theRangeMap = B_rangeMap;
7479 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7480 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7481 TEUCHOS_TEST_FOR_EXCEPTION
7482 (! B_domainMap->isSameAs(*A_domainMap),
7483 std::invalid_argument,
7484 errPfx <<
"The input RowMatrix A must have a domain Map "
7485 "which is the same as (isSameAs) this RowMatrix's "
7487 TEUCHOS_TEST_FOR_EXCEPTION
7488 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7489 errPfx <<
"The input RowMatrix A must have a range Map "
7490 "which is the same as (isSameAs) this RowMatrix's range "
7492 TEUCHOS_TEST_FOR_EXCEPTION
7493 (! domainMap.is_null() &&
7494 ! domainMap->isSameAs(*B_domainMap),
7495 std::invalid_argument,
7496 errPfx <<
"The input domain Map must be the same as "
7497 "(isSameAs) this RowMatrix's domain Map.");
7498 TEUCHOS_TEST_FOR_EXCEPTION
7499 (! rangeMap.is_null() &&
7500 ! rangeMap->isSameAs(*B_rangeMap),
7501 std::invalid_argument,
7502 errPfx <<
"The input range Map must be the same as "
7503 "(isSameAs) this RowMatrix's range Map.");
7506 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7507 TEUCHOS_TEST_FOR_EXCEPTION
7508 (! domainMap.is_null() &&
7509 ! domainMap->isSameAs(*B_domainMap),
7510 std::invalid_argument,
7511 errPfx <<
"The input domain Map must be the same as "
7512 "(isSameAs) this RowMatrix's domain Map.");
7513 TEUCHOS_TEST_FOR_EXCEPTION
7514 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7515 std::invalid_argument,
7516 errPfx <<
"The input range Map must be the same as "
7517 "(isSameAs) this RowMatrix's range Map.");
7520 TEUCHOS_TEST_FOR_EXCEPTION
7521 (domainMap.is_null() || rangeMap.is_null(),
7522 std::invalid_argument, errPfx <<
"If neither A nor B "
7523 "have a domain and range Map, then you must supply a "
7524 "nonnull domain and range Map to this method.");
7531 bool callFillComplete =
true;
7532 RCP<ParameterList> constructorSublist;
7533 RCP<ParameterList> fillCompleteSublist;
7534 if (! params.is_null()) {
7536 params->get(
"Call fillComplete", callFillComplete);
7537 constructorSublist = sublist(params,
"Constructor parameters");
7538 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7541 RCP<const map_type> A_rowMap = A.
getRowMap ();
7542 RCP<const map_type> B_rowMap = B.getRowMap ();
7543 RCP<const map_type> C_rowMap = B_rowMap;
7544 RCP<crs_matrix_type> C;
7550 if (A_rowMap->isSameAs (*B_rowMap)) {
7551 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7552 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7555 if (alpha != ZERO) {
7556 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7558 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7563 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7564 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7565 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7569 if (constructorSublist.is_null ()) {
7570 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7572 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7573 constructorSublist));
7584 TEUCHOS_TEST_FOR_EXCEPTION
7585 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7586 "be the same for statically allocated matrices, to ensure "
7587 "that there is sufficient space to do the addition.");
7590 TEUCHOS_TEST_FOR_EXCEPTION
7591 (C.is_null (), std::logic_error,
7592 errPfx <<
"C should not be null at this point. "
7593 "Please report this bug to the Tpetra developers.");
7596 std::ostringstream os;
7597 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7598 std::cerr << os.str ();
7600 using gids_type = nonconst_global_inds_host_view_type;
7601 using vals_type = nonconst_values_host_view_type;
7605 if (alpha != ZERO) {
7606 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7607 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7609 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7610 if (A_numEntries > static_cast<size_t> (ind.size ())) {
7611 Kokkos::resize(ind,A_numEntries);
7612 Kokkos::resize(val,A_numEntries);
7614 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7615 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7619 for (
size_t k = 0; k < A_numEntries; ++k) {
7620 valView[k] *= alpha;
7623 C->insertGlobalValues (globalRow, A_numEntries,
7624 reinterpret_cast<Scalar *>(valView.data()),
7630 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7631 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7632 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7633 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7634 if (B_numEntries > static_cast<size_t> (ind.size ())) {
7635 Kokkos::resize(ind,B_numEntries);
7636 Kokkos::resize(val,B_numEntries);
7638 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7639 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7640 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7643 for (
size_t k = 0; k < B_numEntries; ++k) {
7647 C->insertGlobalValues (globalRow, B_numEntries,
7648 reinterpret_cast<Scalar *>(valView.data()),
7653 if (callFillComplete) {
7655 std::ostringstream os;
7656 os << *prefix <<
"Call fillComplete on C" << endl;
7657 std::cerr << os.str ();
7659 if (fillCompleteSublist.is_null ()) {
7660 C->fillComplete (theDomainMap, theRangeMap);
7662 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7666 std::ostringstream os;
7667 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7668 std::cerr << os.str ();
7672 std::ostringstream os;
7673 os << *prefix <<
"Done" << endl;
7674 std::cerr << os.str ();
7681 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7685 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7686 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7687 const Teuchos::RCP<const map_type>& domainMap,
7688 const Teuchos::RCP<const map_type>& rangeMap,
7689 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7696 using Teuchos::ArrayRCP;
7697 using Teuchos::ArrayView;
7698 using Teuchos::Comm;
7699 using Teuchos::ParameterList;
7702 typedef LocalOrdinal LO;
7703 typedef GlobalOrdinal GO;
7704 typedef node_type NT;
7709 const bool debug = Behavior::debug(
"CrsMatrix");
7710 const bool verbose = Behavior::verbose(
"CrsMatrix");
7711 int MyPID = getComm ()->getRank ();
7713 std::unique_ptr<std::string> verbosePrefix;
7716 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7717 std::ostringstream os;
7718 os <<
"Start" << endl;
7719 std::cerr << os.str();
7726 bool reverseMode =
false;
7727 bool restrictComm =
false;
7729 int mm_optimization_core_count =
7730 Behavior::TAFC_OptimizationCoreCount();
7731 RCP<ParameterList> matrixparams;
7732 bool overrideAllreduce =
false;
7733 bool useKokkosPath =
false;
7734 if (! params.is_null ()) {
7735 matrixparams = sublist (params,
"CrsMatrix");
7736 reverseMode = params->get (
"Reverse Mode", reverseMode);
7737 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7738 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7739 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7740 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7741 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7743 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7744 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7745 if(reverseMode) isMM =
false;
7749 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7751 int reduced_mismatch = 0;
7752 if (isMM && !overrideAllreduce) {
7755 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7756 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7757 rowTransfer.getRemoteLIDs ().size() == 0);
7758 mismatch = (source_vals != target_vals) ? 1 : 0;
7761 Teuchos::REDUCE_MAX, * (getComm ()));
7764 #ifdef HAVE_TPETRA_MMM_TIMINGS
7765 using Teuchos::TimeMonitor;
7767 if(!params.is_null())
7768 label = params->get(
"Timer Label",label);
7769 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7772 std::ostringstream os;
7773 if(isMM) os<<
":MMOpt";
7774 else os<<
":MMLegacy";
7778 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7786 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7787 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7788 TEUCHOS_TEST_FOR_EXCEPTION(
7789 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7790 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7791 "argument must be either an Import or an Export, and its template "
7792 "parameters must match the corresponding template parameters of the "
7800 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type> (domainTransfer);
7801 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type> (domainTransfer);
7803 if(! domainTransfer.is_null()) {
7804 TEUCHOS_TEST_FOR_EXCEPTION(
7805 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7806 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7807 "argument must be either an Import or an Export, and its template "
7808 "parameters must match the corresponding template parameters of the "
7811 TEUCHOS_TEST_FOR_EXCEPTION(
7812 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7813 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7814 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7815 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7816 "arguments must be of the same type (either Import or Export).");
7818 TEUCHOS_TEST_FOR_EXCEPTION(
7819 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7820 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7821 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7822 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7823 "arguments must be of the same type (either Import or Export).");
7829 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7833 RCP<const map_type> MyRowMap = reverseMode ?
7834 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7835 RCP<const map_type> MyColMap;
7836 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7837 domainMap : getDomainMap ();
7838 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7839 rangeMap : getRangeMap ();
7840 RCP<const map_type> BaseRowMap = MyRowMap;
7841 RCP<const map_type> BaseDomainMap = MyDomainMap;
7849 if (! destMat.is_null ()) {
7860 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7861 ! destMat->getGraph ()->isGloballyIndexed ();
7862 TEUCHOS_TEST_FOR_EXCEPTION(
7863 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7864 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7865 "to be nonnull, if its graph is empty (neither locally nor globally "
7874 TEUCHOS_TEST_FOR_EXCEPTION(
7875 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7876 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7877 "input argument 'destMat' is not the same as the (row) Map specified "
7878 "by the input argument 'rowTransfer'.");
7879 TEUCHOS_TEST_FOR_EXCEPTION(
7880 ! destMat->checkSizes (*
this), std::invalid_argument,
7881 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7882 "destination matrix, but checkSizes() indicates that it is not a legal "
7883 "legal target for redistribution from the source matrix (*this). This "
7884 "may mean that they do not have the same dimensions.");
7898 TEUCHOS_TEST_FOR_EXCEPTION(
7899 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7900 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7901 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7902 TEUCHOS_TEST_FOR_EXCEPTION(
7903 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7904 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7905 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7908 TEUCHOS_TEST_FOR_EXCEPTION(
7909 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7910 std::invalid_argument,
7911 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7912 "argument must be the same as the rebalanced domain map 'domainMap'");
7914 TEUCHOS_TEST_FOR_EXCEPTION(
7915 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7916 std::invalid_argument,
7917 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7918 "argument must be the same as the rebalanced domain map 'domainMap'");
7931 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7932 ArrayView<const LO> ExportLIDs = reverseMode ?
7933 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7934 auto RemoteLIDs = reverseMode ?
7935 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7936 auto PermuteToLIDs = reverseMode ?
7937 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7938 auto PermuteFromLIDs = reverseMode ?
7939 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7940 Distributor& Distor = rowTransfer.getDistributor ();
7943 Teuchos::Array<int> SourcePids;
7946 RCP<const map_type> ReducedRowMap, ReducedColMap,
7947 ReducedDomainMap, ReducedRangeMap;
7948 RCP<const Comm<int> > ReducedComm;
7952 if (destMat.is_null ()) {
7953 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7960 #ifdef HAVE_TPETRA_MMM_TIMINGS
7961 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7963 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7964 ReducedComm = ReducedRowMap.is_null () ?
7966 ReducedRowMap->getComm ();
7967 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7969 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7971 MyDomainMap->replaceCommWithSubset (ReducedComm);
7972 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7974 MyRangeMap->replaceCommWithSubset (ReducedComm);
7977 MyRowMap = ReducedRowMap;
7978 MyDomainMap = ReducedDomainMap;
7979 MyRangeMap = ReducedRangeMap;
7982 if (! ReducedComm.is_null ()) {
7983 MyPID = ReducedComm->getRank ();
7990 ReducedComm = MyRowMap->getComm ();
7999 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8002 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8004 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8005 #ifdef HAVE_TPETRA_MMM_TIMINGS
8006 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8014 Import_Util::getPids (*MyImporter, SourcePids,
false);
8016 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8019 #ifdef HAVE_TPETRA_MMM_TIMINGS
8020 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8022 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8023 IntVectorType SourceCol_pids(getColMap());
8025 SourceDomain_pids.putScalar(MyPID);
8027 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8028 SourcePids.resize (getColMap ()->getLocalNumElements ());
8029 SourceCol_pids.get1dCopy (SourcePids ());
8031 else if (MyImporter.is_null ()) {
8033 #ifdef HAVE_TPETRA_MMM_TIMINGS
8034 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8036 SourcePids.resize (getColMap ()->getLocalNumElements ());
8037 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8039 else if ( ! MyImporter.is_null () &&
8040 ! domainTransfer.is_null () ) {
8045 #ifdef HAVE_TPETRA_MMM_TIMINGS
8046 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8050 IntVectorType TargetDomain_pids (domainMap);
8051 TargetDomain_pids.putScalar (MyPID);
8054 IntVectorType SourceDomain_pids (getDomainMap ());
8057 IntVectorType SourceCol_pids (getColMap ());
8059 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8060 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8062 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8063 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8065 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8066 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8068 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8069 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8072 TEUCHOS_TEST_FOR_EXCEPTION(
8073 true, std::logic_error,
"Tpetra::CrsMatrix::"
8074 "transferAndFillComplete: Should never get here! "
8075 "Please report this bug to a Tpetra developer.");
8077 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8078 SourcePids.resize (getColMap ()->getLocalNumElements ());
8079 SourceCol_pids.get1dCopy (SourcePids ());
8081 else if ( ! MyImporter.is_null () &&
8082 BaseDomainMap->isSameAs (*BaseRowMap) &&
8083 getDomainMap ()->isSameAs (*getRowMap ())) {
8085 #ifdef HAVE_TPETRA_MMM_TIMINGS
8086 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8089 IntVectorType TargetRow_pids (domainMap);
8090 IntVectorType SourceRow_pids (getRowMap ());
8091 IntVectorType SourceCol_pids (getColMap ());
8093 TargetRow_pids.putScalar (MyPID);
8094 if (! reverseMode && xferAsImport !=
nullptr) {
8095 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8097 else if (reverseMode && xferAsExport !=
nullptr) {
8098 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8100 else if (! reverseMode && xferAsExport !=
nullptr) {
8101 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8103 else if (reverseMode && xferAsImport !=
nullptr) {
8104 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8107 TEUCHOS_TEST_FOR_EXCEPTION(
8108 true, std::logic_error,
"Tpetra::CrsMatrix::"
8109 "transferAndFillComplete: Should never get here! "
8110 "Please report this bug to a Tpetra developer.");
8113 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8114 SourcePids.resize (getColMap ()->getLocalNumElements ());
8115 SourceCol_pids.get1dCopy (SourcePids ());
8118 TEUCHOS_TEST_FOR_EXCEPTION(
8119 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8120 "transferAndFillComplete: This method only allows either domainMap == "
8121 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8122 "getDomainMap () == getRowMap ()).");
8126 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8128 #ifdef HAVE_TPETRA_MMM_TIMINGS
8129 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8131 if (constantNumPackets == 0) {
8132 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8133 RemoteLIDs.view_host().size ());
8140 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8141 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8147 #ifdef HAVE_TPETRA_MMM_TIMINGS
8148 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8151 using Teuchos::outArg;
8152 using Teuchos::REDUCE_MAX;
8153 using Teuchos::reduceAll;
8156 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8157 const int myRank = comm->getRank ();
8159 std::ostringstream errStrm;
8163 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8166 destMat->numExportPacketsPerLID_.modify_host ();
8167 numExportPacketsPerLID =
8170 catch (std::exception& e) {
8171 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8172 << e.what () << std::endl;
8176 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8177 "an exception not a subclass of std::exception" << std::endl;
8181 if (! comm.is_null ()) {
8182 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8185 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8186 TEUCHOS_TEST_FOR_EXCEPTION(
8187 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8188 "exception on at least one process.");
8192 std::ostringstream os;
8193 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8195 std::cerr << os.str ();
8200 numExportPacketsPerLID,
8203 constantNumPackets);
8205 catch (std::exception& e) {
8206 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8207 << e.what () << std::endl;
8211 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8212 "an exception not a subclass of std::exception" << std::endl;
8217 std::ostringstream os;
8218 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8220 std::cerr << os.str ();
8223 if (! comm.is_null ()) {
8224 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8227 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8228 TEUCHOS_TEST_FOR_EXCEPTION(
8229 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8230 "exception on at least one process.");
8235 destMat->numExportPacketsPerLID_.modify_host ();
8236 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8239 std::ostringstream os;
8240 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8242 std::cerr << os.str ();
8246 numExportPacketsPerLID,
8249 constantNumPackets);
8251 std::ostringstream os;
8252 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8254 std::cerr << os.str ();
8261 #ifdef HAVE_TPETRA_MMM_TIMINGS
8262 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8264 if (! communication_needed) {
8266 std::ostringstream os;
8267 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8268 std::cerr << os.str ();
8273 if (constantNumPackets == 0) {
8275 std::ostringstream os;
8276 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8278 std::cerr << os.str ();
8283 destMat->numExportPacketsPerLID_.sync_host ();
8284 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8286 destMat->numImportPacketsPerLID_.sync_host ();
8287 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8291 std::ostringstream os;
8292 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8294 std::cerr << os.str ();
8296 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8297 destMat->numImportPacketsPerLID_.view_host());
8299 std::ostringstream os;
8300 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8302 std::cerr << os.str ();
8305 size_t totalImportPackets = 0;
8306 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8307 totalImportPackets += numImportPacketsPerLID[i];
8312 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8313 verbosePrefix.get ());
8314 destMat->imports_.modify_host ();
8315 auto hostImports = destMat->imports_.view_host();
8318 destMat->exports_.sync_host ();
8319 auto hostExports = destMat->exports_.view_host();
8321 std::ostringstream os;
8322 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8324 std::cerr << os.str ();
8326 Distor.doReversePostsAndWaits (hostExports,
8327 numExportPacketsPerLID,
8329 numImportPacketsPerLID);
8331 std::ostringstream os;
8332 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8334 std::cerr << os.str ();
8339 std::ostringstream os;
8340 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8342 std::cerr << os.str ();
8344 destMat->imports_.modify_host ();
8345 auto hostImports = destMat->imports_.view_host();
8348 destMat->exports_.sync_host ();
8349 auto hostExports = destMat->exports_.view_host();
8351 std::ostringstream os;
8352 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8354 std::cerr << os.str ();
8356 Distor.doReversePostsAndWaits (hostExports,
8360 std::ostringstream os;
8361 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8363 std::cerr << os.str ();
8368 if (constantNumPackets == 0) {
8370 std::ostringstream os;
8371 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8373 std::cerr << os.str ();
8378 destMat->numExportPacketsPerLID_.sync_host ();
8379 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8381 destMat->numImportPacketsPerLID_.sync_host ();
8382 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8385 std::ostringstream os;
8386 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8388 std::cerr << os.str ();
8390 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8391 destMat->numImportPacketsPerLID_.view_host());
8393 std::ostringstream os;
8394 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8396 std::cerr << os.str ();
8399 size_t totalImportPackets = 0;
8400 for (
Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8401 totalImportPackets += numImportPacketsPerLID[i];
8406 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8407 verbosePrefix.get ());
8408 destMat->imports_.modify_host ();
8409 auto hostImports = destMat->imports_.view_host();
8412 destMat->exports_.sync_host ();
8413 auto hostExports = destMat->exports_.view_host();
8415 std::ostringstream os;
8416 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8418 std::cerr << os.str ();
8420 Distor.doPostsAndWaits (hostExports,
8421 numExportPacketsPerLID,
8423 numImportPacketsPerLID);
8425 std::ostringstream os;
8426 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8428 std::cerr << os.str ();
8433 std::ostringstream os;
8434 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8436 std::cerr << os.str ();
8438 destMat->imports_.modify_host ();
8439 auto hostImports = destMat->imports_.view_host();
8442 destMat->exports_.sync_host ();
8443 auto hostExports = destMat->exports_.view_host();
8445 std::ostringstream os;
8446 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8448 std::cerr << os.str ();
8450 Distor.doPostsAndWaits (hostExports,
8454 std::ostringstream os;
8455 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8457 std::cerr << os.str ();
8468 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8470 Teuchos::Array<int> RemotePids;
8472 Teuchos::Array<int> TargetPids;
8478 destMat->numImportPacketsPerLID_.modify_host();
8480 # ifdef HAVE_TPETRA_MMM_TIMINGS
8481 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8483 ArrayRCP<size_t> CSR_rowptr;
8484 ArrayRCP<GO> CSR_colind_GID;
8485 ArrayRCP<LO> CSR_colind_LID;
8486 ArrayRCP<Scalar> CSR_vals;
8488 destMat->imports_.sync_device ();
8489 destMat->numImportPacketsPerLID_.sync_device ();
8491 size_t N = BaseRowMap->getLocalNumElements ();
8493 auto RemoteLIDs_d = RemoteLIDs.view_device();
8494 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8495 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8500 destMat->imports_.view_device(),
8501 destMat->numImportPacketsPerLID_.view_device(),
8515 if (
typeid (LO) ==
typeid (GO)) {
8516 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8519 CSR_colind_LID.resize (CSR_colind_GID.size());
8521 CSR_colind_LID.resize (CSR_colind_GID.size());
8526 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8528 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8530 #ifdef HAVE_TPETRA_MMM_TIMINGS
8531 tmCopySPRdata = Teuchos::null;
8540 std::ostringstream os;
8541 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8543 std::cerr << os.str ();
8546 #ifdef HAVE_TPETRA_MMM_TIMINGS
8547 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8549 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8559 std::ostringstream os;
8560 os << *verbosePrefix <<
"restrictComm="
8561 << (restrictComm ?
"true" :
"false") << std::endl;
8562 std::cerr << os.str ();
8569 #ifdef HAVE_TPETRA_MMM_TIMINGS
8570 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8573 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8575 MyColMap->replaceCommWithSubset (ReducedComm);
8576 MyColMap = ReducedColMap;
8581 std::ostringstream os;
8582 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8583 std::cerr << os.str ();
8585 destMat->replaceColMap (MyColMap);
8592 if (ReducedComm.is_null ()) {
8594 std::ostringstream os;
8595 os << *verbosePrefix <<
"I am no longer in the communicator; "
8596 "returning" << std::endl;
8597 std::cerr << os.str ();
8606 if ((! reverseMode && xferAsImport !=
nullptr) ||
8607 (reverseMode && xferAsExport !=
nullptr)) {
8609 std::ostringstream os;
8610 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8611 std::cerr << os.str ();
8613 #ifdef HAVE_TPETRA_MMM_TIMINGS
8614 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8616 Import_Util::sortCrsEntries (CSR_rowptr(),
8620 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8621 (reverseMode && xferAsImport !=
nullptr)) {
8623 std::ostringstream os;
8624 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8626 std::cerr << os.str();
8628 #ifdef HAVE_TPETRA_MMM_TIMINGS
8629 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8631 Import_Util::sortAndMergeCrsEntries (CSR_rowptr(),
8634 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8635 CSR_colind_LID.resize (CSR_rowptr[N]);
8636 CSR_vals.resize (CSR_rowptr[N]);
8640 TEUCHOS_TEST_FOR_EXCEPTION(
8641 true, std::logic_error,
"Tpetra::CrsMatrix::"
8642 "transferAndFillComplete: Should never get here! "
8643 "Please report this bug to a Tpetra developer.");
8650 std::ostringstream os;
8651 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8652 std::cerr << os.str ();
8661 #ifdef HAVE_TPETRA_MMM_TIMINGS
8662 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8664 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8676 destMat->numImportPacketsPerLID_.modify_host();
8678 # ifdef HAVE_TPETRA_MMM_TIMINGS
8679 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8681 ArrayRCP<size_t> CSR_rowptr;
8682 ArrayRCP<GO> CSR_colind_GID;
8683 ArrayRCP<LO> CSR_colind_LID;
8684 ArrayRCP<Scalar> CSR_vals;
8686 destMat->imports_.sync_device ();
8687 destMat->numImportPacketsPerLID_.sync_device ();
8689 size_t N = BaseRowMap->getLocalNumElements ();
8691 auto RemoteLIDs_d = RemoteLIDs.view_device();
8692 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8693 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8695 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8696 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8697 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8698 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8699 Kokkos::View<int*,device_type> TargetPids_d;
8704 destMat->imports_.view_device(),
8705 destMat->numImportPacketsPerLID_.view_device(),
8717 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8719 #ifdef HAVE_TPETRA_MMM_TIMINGS
8720 tmCopySPRdata = Teuchos::null;
8729 std::ostringstream os;
8730 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8732 std::cerr << os.str ();
8735 #ifdef HAVE_TPETRA_MMM_TIMINGS
8736 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8738 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8748 std::ostringstream os;
8749 os << *verbosePrefix <<
"restrictComm="
8750 << (restrictComm ?
"true" :
"false") << std::endl;
8751 std::cerr << os.str ();
8758 #ifdef HAVE_TPETRA_MMM_TIMINGS
8759 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8762 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8764 MyColMap->replaceCommWithSubset (ReducedComm);
8765 MyColMap = ReducedColMap;
8770 std::ostringstream os;
8771 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8772 std::cerr << os.str ();
8774 destMat->replaceColMap (MyColMap);
8781 if (ReducedComm.is_null ()) {
8783 std::ostringstream os;
8784 os << *verbosePrefix <<
"I am no longer in the communicator; "
8785 "returning" << std::endl;
8786 std::cerr << os.str ();
8796 if ((! reverseMode && xferAsImport !=
nullptr) ||
8797 (reverseMode && xferAsExport !=
nullptr)) {
8799 std::ostringstream os;
8800 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8801 std::cerr << os.str ();
8803 #ifdef HAVE_TPETRA_MMM_TIMINGS
8804 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8806 Import_Util::sortCrsEntries (CSR_rowptr_d,
8810 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8811 (reverseMode && xferAsImport !=
nullptr)) {
8813 std::ostringstream os;
8814 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8816 std::cerr << os.str();
8818 #ifdef HAVE_TPETRA_MMM_TIMINGS
8819 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8821 Import_Util::sortAndMergeCrsEntries (CSR_rowptr_d,
8826 TEUCHOS_TEST_FOR_EXCEPTION(
8827 true, std::logic_error,
"Tpetra::CrsMatrix::"
8828 "transferAndFillComplete: Should never get here! "
8829 "Please report this bug to a Tpetra developer.");
8837 std::ostringstream os;
8838 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8839 std::cerr << os.str ();
8843 #ifdef HAVE_TPETRA_MMM_TIMINGS
8844 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8846 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8854 #ifdef HAVE_TPETRA_MMM_TIMINGS
8855 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8858 Teuchos::ParameterList esfc_params;
8860 RCP<import_type> MyImport;
8863 if (iallreduceRequest.get () !=
nullptr) {
8865 std::ostringstream os;
8866 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8868 std::cerr << os.str ();
8870 iallreduceRequest->wait ();
8871 if (reduced_mismatch != 0) {
8877 #ifdef HAVE_TPETRA_MMM_TIMINGS
8878 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8883 std::ostringstream os;
8884 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8885 std::cerr << os.str ();
8888 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8889 Teuchos::ArrayRCP<int> type3PIDs;
8890 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8891 auto colind = getCrsGraph()->getLocalIndicesHost();
8894 std::ostringstream os;
8895 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8896 std::cerr << os.str ();
8900 #ifdef HAVE_TPETRA_MMM_TIMINGS
8901 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8903 Import_Util::reverseNeighborDiscovery(*
this,
8915 std::ostringstream os;
8916 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8917 std::cerr << os.str ();
8920 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8921 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8923 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8924 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8926 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8928 std::vector<bool> IsOwned(numCols,
true);
8929 std::vector<int> SentTo(numCols,-1);
8930 if (! MyImporter.is_null ()) {
8931 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8932 IsOwned[rlid]=
false;
8936 std::vector<std::pair<int,GO> > usrtg;
8937 usrtg.reserve(TEPID2.size());
8940 const auto& colMap = * (this->getColMap ());
8942 const LO row = TELID2[i];
8943 const int pid = TEPID2[i];
8944 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8945 const int col = colind[j];
8946 if (IsOwned[col] && SentTo[col] != pid) {
8948 GO gid = colMap.getGlobalElement (col);
8949 usrtg.push_back (std::pair<int,GO> (pid, gid));
8957 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8959 usrtg.erase(eopg,usrtg.end());
8962 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8963 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8966 for(
auto && p : usrtg) {
8967 EPID2[pos]= p.first;
8968 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8972 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8973 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8974 GO InfGID = std::numeric_limits<GO>::max();
8975 int InfPID = INT_MAX;
8978 #endif // TPETRA_MIN3
8979 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
8980 int i1=0, i2=0, i3=0;
8981 int Len1 = EPID1.size();
8982 int Len2 = EPID2.size();
8983 int Len3 = EPID3.size();
8985 int MyLen=Len1+Len2+Len3;
8986 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
8987 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
8990 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
8991 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
8992 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
8993 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
8995 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
8996 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
8997 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
8999 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9000 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9003 #endif // TPETRA_MIN3
9004 bool added_entry=
false;
9006 if(PID1 == MIN_PID && GID1 == MIN_GID){
9007 userExportLIDs[iloc]=ELID1[i1];
9008 userExportPIDs[iloc]=EPID1[i1];
9013 if(PID2 == MIN_PID && GID2 == MIN_GID){
9015 userExportLIDs[iloc]=ELID2[i2];
9016 userExportPIDs[iloc]=EPID2[i2];
9022 if(PID3 == MIN_PID && GID3 == MIN_GID){
9024 userExportLIDs[iloc]=ELID3[i3];
9025 userExportPIDs[iloc]=EPID3[i3];
9033 std::ostringstream os;
9034 os << *verbosePrefix <<
"Create Import" << std::endl;
9035 std::cerr << os.str ();
9038 #ifdef HAVE_TPETRA_MMM_TIMINGS
9039 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9041 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9043 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9044 MyImport = rcp (
new import_type (MyDomainMap,
9047 userExportLIDs.view(0,iloc).getConst(),
9048 userExportPIDs.view(0,iloc).getConst(),
9053 std::ostringstream os;
9054 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9055 std::cerr << os.str ();
9059 #ifdef HAVE_TPETRA_MMM_TIMINGS
9060 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9061 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9063 if(!params.is_null())
9064 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9065 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9071 #ifdef HAVE_TPETRA_MMM_TIMINGS
9072 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9075 std::ostringstream os;
9076 os << *verbosePrefix <<
"Create Import" << std::endl;
9077 std::cerr << os.str ();
9080 #ifdef HAVE_TPETRA_MMM_TIMINGS
9081 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9083 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9084 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9085 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9086 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9089 std::ostringstream os;
9090 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9091 std::cerr << os.str ();
9094 #ifdef HAVE_TPETRA_MMM_TIMINGS
9095 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9096 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9098 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9101 if (!params.is_null ()) {
9102 esfc_params.set (
"compute global constants",
9103 params->get (
"compute global constants",
true));
9105 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9106 MyImport, Teuchos::null,
9107 rcp (
new Teuchos::ParameterList (esfc_params)));
9110 #ifdef HAVE_TPETRA_MMM_TIMINGS
9111 tmIESFC = Teuchos::null;
9115 std::ostringstream os;
9116 os << *verbosePrefix <<
"Done" << endl;
9117 std::cerr << os.str ();
9122 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9127 const Teuchos::RCP<const map_type>& domainMap,
9128 const Teuchos::RCP<const map_type>& rangeMap,
9129 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9131 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9134 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9140 const Teuchos::RCP<const map_type>& domainMap,
9141 const Teuchos::RCP<const map_type>& rangeMap,
9142 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9144 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9147 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9152 const Teuchos::RCP<const map_type>& domainMap,
9153 const Teuchos::RCP<const map_type>& rangeMap,
9154 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9156 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9159 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9165 const Teuchos::RCP<const map_type>& domainMap,
9166 const Teuchos::RCP<const map_type>& rangeMap,
9167 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9169 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9180 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9182 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9184 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9186 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9187 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9189 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9191 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9192 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9193 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9194 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9195 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9196 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9197 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9198 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9199 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9200 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9201 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9202 const Teuchos::RCP<Teuchos::ParameterList>& params);
9204 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9206 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9207 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9208 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9209 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9210 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9211 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9212 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9213 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9214 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9215 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9216 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9217 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9218 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9219 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9220 const Teuchos::RCP<Teuchos::ParameterList>& params);
9223 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9225 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9226 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9227 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9228 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9229 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9230 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9231 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9232 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9233 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9234 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9235 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9236 const Teuchos::RCP<Teuchos::ParameterList>& params);
9238 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9240 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9241 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9242 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9243 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9244 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9245 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9246 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9247 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9248 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9249 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9250 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9251 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9252 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9253 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9254 const Teuchos::RCP<Teuchos::ParameterList>& params);
9257 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9258 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9259 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9260 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9261 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9262 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9264 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
mag_type getNormInf() const
Compute and return the infinity norm of the matrix.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
mag_type getNorm1(bool assumeSymmetric=false) const
Compute and return the 1-norm of the matrix.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
Declaration of Tpetra::Details::EquilibrationInfo.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A)
Compute global row one-norms ("row sums") of the input sparse matrix A, in a way suitable for one-sid...
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
Details::EquilibrationInfo< typename Kokkos::ArithTraits< SC >::val_type, typename NT::device_type > computeRowAndColumnOneNorms(const Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool assumeSymmetric)
Compute global row and column one-norms ("row sums" and "column sums") of the input sparse matrix A...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror num_row_entries_type
Row offsets for "1-D" storage.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
Declaration of Tpetra::computeRowAndColumnOneNorms.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.