10 #ifndef TPETRA_CRSMATRIX_DEF_HPP
11 #define TPETRA_CRSMATRIX_DEF_HPP
23 #include "Tpetra_RowMatrix.hpp"
24 #include "Tpetra_LocalCrsMatrixOperator.hpp"
32 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
40 #include "Tpetra_Details_packCrsMatrix.hpp"
41 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
43 #include "Teuchos_FancyOStream.hpp"
44 #include "Teuchos_RCP.hpp"
45 #include "Teuchos_DataAccess.hpp"
46 #include "Teuchos_SerialDenseMatrix.hpp"
47 #include "KokkosBlas1_scal.hpp"
48 #include "KokkosSparse_getDiagCopy.hpp"
49 #include "KokkosSparse_spmv.hpp"
50 #include "Kokkos_StdAlgorithms.hpp"
62 template<
class T,
class BinaryFunction>
63 T atomic_binary_function_update (
volatile T*
const dest,
77 T newVal = f (assume, inputVal);
78 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
79 }
while (assume != oldVal);
99 template<
class Scalar>
103 typedef Teuchos::ScalarTraits<Scalar> STS;
104 return std::max (STS::magnitude (x), STS::magnitude (y));
113 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
114 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
115 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
116 size_t maxNumEntriesPerRow,
117 const Teuchos::RCP<Teuchos::ParameterList>& params) :
120 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
121 "[, RCP<ParameterList>]): ";
122 Teuchos::RCP<crs_graph_type> graph;
124 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
127 catch (std::exception& e) {
128 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
129 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
130 "size_t [, RCP<ParameterList>]) threw an exception: "
137 staticGraph_ = myGraph_;
142 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
145 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
146 const Teuchos::RCP<Teuchos::ParameterList>& params) :
149 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
150 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
151 Teuchos::RCP<crs_graph_type> graph;
157 catch (std::exception& e) {
158 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
159 (
true, std::runtime_error,
"CrsGraph constructor "
160 "(RCP<const Map>, ArrayView<const size_t>"
161 "[, RCP<ParameterList>]) threw an exception: "
168 staticGraph_ = graph;
173 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
176 const Teuchos::RCP<const map_type>& colMap,
177 const size_t maxNumEntPerRow,
178 const Teuchos::RCP<Teuchos::ParameterList>& params) :
181 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
182 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
183 const char suffix[] =
184 " Please report this bug to the Tpetra developers.";
187 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
188 (! staticGraph_.is_null (), std::logic_error,
189 "staticGraph_ is not null at the beginning of the constructor."
191 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
192 (! myGraph_.is_null (), std::logic_error,
193 "myGraph_ is not null at the beginning of the constructor."
195 Teuchos::RCP<crs_graph_type> graph;
201 catch (std::exception& e) {
202 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
203 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
204 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
205 "exception: " << e.what ());
211 staticGraph_ = myGraph_;
216 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
219 const Teuchos::RCP<const map_type>& colMap,
220 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
221 const Teuchos::RCP<Teuchos::ParameterList>& params) :
224 const char tfecfFuncName[] =
225 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
226 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
227 Teuchos::RCP<crs_graph_type> graph;
233 catch (std::exception& e) {
234 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
235 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
236 "RCP<const Map>, ArrayView<const size_t>[, "
237 "RCP<ParameterList>]) threw an exception: " << e.what ());
243 staticGraph_ = graph;
249 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
251 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
252 const Teuchos::RCP<Teuchos::ParameterList>& ) :
254 staticGraph_ (graph),
255 storageStatus_ (Details::STORAGE_1D_PACKED)
258 typedef typename local_matrix_device_type::values_type values_type;
259 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
260 "RCP<ParameterList>]): ";
263 std::unique_ptr<std::string> prefix;
265 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
266 std::ostringstream os;
267 os << *prefix <<
"Start" << endl;
268 std::cerr << os.str ();
271 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
272 (graph.is_null (), std::runtime_error,
"Input graph is null.");
273 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
274 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
275 "is not fill complete. You must call fillComplete on the "
276 "graph before using it to construct a CrsMatrix. Note that "
277 "calling resumeFill on the graph makes it not fill complete, "
278 "even if you had previously called fillComplete. In that "
279 "case, you must call fillComplete on the graph again.");
287 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
289 std::ostringstream os;
290 os << *prefix <<
"Allocate values: " << numEnt << endl;
291 std::cerr << os.str ();
294 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
296 valuesUnpacked_wdv = valuesPacked_wdv;
301 std::ostringstream os;
302 os << *prefix <<
"Done" << endl;
303 std::cerr << os.str ();
307 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
310 const Teuchos::RCP<const crs_graph_type>& graph,
311 const Teuchos::RCP<Teuchos::ParameterList>& params) :
313 staticGraph_ (graph),
314 storageStatus_ (matrix.storageStatus_)
316 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
317 "local_matrix_device_type::values_type, "
318 "[,RCP<ParameterList>]): ";
319 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
320 (graph.is_null (), std::runtime_error,
"Input graph is null.");
321 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
322 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
323 "is not fill complete. You must call fillComplete on the "
324 "graph before using it to construct a CrsMatrix. Note that "
325 "calling resumeFill on the graph makes it not fill complete, "
326 "even if you had previously called fillComplete. In that "
327 "case, you must call fillComplete on the graph again.");
329 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
330 valuesPacked_wdv =
values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
332 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
333 valuesUnpacked_wdv =
values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
339 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
341 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
342 const typename local_matrix_device_type::values_type& values,
343 const Teuchos::RCP<Teuchos::ParameterList>& ) :
345 staticGraph_ (graph),
346 storageStatus_ (Details::STORAGE_1D_PACKED)
348 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
349 "local_matrix_device_type::values_type, "
350 "[,RCP<ParameterList>]): ";
351 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
352 (graph.is_null (), std::runtime_error,
"Input graph is null.");
353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
354 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
355 "is not fill complete. You must call fillComplete on the "
356 "graph before using it to construct a CrsMatrix. Note that "
357 "calling resumeFill on the graph makes it not fill complete, "
358 "even if you had previously called fillComplete. In that "
359 "case, you must call fillComplete on the graph again.");
368 valuesUnpacked_wdv = valuesPacked_wdv;
379 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
382 const Teuchos::RCP<const map_type>& colMap,
383 const typename local_graph_device_type::row_map_type& rowPointers,
384 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
385 const typename local_matrix_device_type::values_type& values,
386 const Teuchos::RCP<Teuchos::ParameterList>& params) :
388 storageStatus_ (Details::STORAGE_1D_PACKED)
390 using Details::getEntryOnHost;
393 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
394 "RCP<const Map>, ptr, ind, val[, params]): ";
395 const char suffix[] =
396 ". Please report this bug to the Tpetra developers.";
400 std::unique_ptr<std::string> prefix;
402 prefix = this->createPrefix(
403 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
404 std::ostringstream os;
405 os << *prefix <<
"Start" << endl;
406 std::cerr << os.str ();
413 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
414 (values.extent(0) != columnIndices.extent(0),
415 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
416 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
418 if (debug && rowPointers.extent(0) != 0) {
419 const size_t numEnt =
420 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
421 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
422 (numEnt !=
size_t(columnIndices.extent(0)) ||
423 numEnt !=
size_t(values.extent(0)),
424 std::invalid_argument,
"Last entry of rowPointers says that "
425 "the matrix has " << numEnt <<
" entr"
426 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
427 "columnIndices and values don't match this. "
428 "columnIndices.extent(0)=" << columnIndices.extent (0)
429 <<
" and values.extent(0)=" << values.extent (0) <<
".");
432 RCP<crs_graph_type> graph;
434 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
435 columnIndices, params));
437 catch (std::exception& e) {
438 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
439 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
440 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
448 auto lclGraph = graph->getLocalGraphDevice ();
449 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
450 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
451 lclGraph.entries.extent (0) != columnIndices.extent (0),
452 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
453 "ind[, params]) did not set the local graph correctly." << suffix);
454 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
455 (lclGraph.entries.extent (0) != values.extent (0),
456 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
457 "params]) did not set the local graph correctly. "
458 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
459 <<
" != values.extent(0) = " << values.extent (0) << suffix);
465 staticGraph_ = graph;
475 valuesUnpacked_wdv = valuesPacked_wdv;
484 std::ostringstream os;
485 os << *prefix <<
"Done" << endl;
486 std::cerr << os.str();
490 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
493 const Teuchos::RCP<const map_type>& colMap,
494 const Teuchos::ArrayRCP<size_t>& ptr,
495 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
496 const Teuchos::ArrayRCP<Scalar>& val,
497 const Teuchos::RCP<Teuchos::ParameterList>& params) :
499 storageStatus_ (Details::STORAGE_1D_PACKED)
501 using Kokkos::Compat::getKokkosViewDeepCopy;
502 using Teuchos::av_reinterpret_cast;
504 using values_type =
typename local_matrix_device_type::values_type;
506 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
507 "RCP<const Map>, ptr, ind, val[, params]): ";
509 RCP<crs_graph_type> graph;
514 catch (std::exception& e) {
515 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
516 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
517 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
518 "RCP<ParameterList>]) threw an exception: " << e.what ());
524 staticGraph_ = graph;
537 auto lclGraph = staticGraph_->getLocalGraphDevice ();
538 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
539 (
size_t (lclGraph.row_map.extent (0)) !=
size_t (ptr.size ()) ||
540 size_t (lclGraph.entries.extent (0)) !=
size_t (ind.size ()),
541 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
542 "ptr, ind[, params]) did not set the local graph correctly. "
543 "Please report this bug to the Tpetra developers.");
546 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
548 valuesUnpacked_wdv = valuesPacked_wdv;
558 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
561 const Teuchos::RCP<const map_type>& colMap,
563 const Teuchos::RCP<Teuchos::ParameterList>& params) :
565 storageStatus_ (Details::STORAGE_1D_PACKED),
568 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
569 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
570 const char suffix[] =
571 " Please report this bug to the Tpetra developers.";
573 Teuchos::RCP<crs_graph_type> graph;
576 lclMatrix.graph, params));
578 catch (std::exception& e) {
579 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
580 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
581 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
582 "exception: " << e.what ());
584 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
585 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
586 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
587 "did not produce a fill-complete graph. Please report this bug to the "
588 "Tpetra developers.");
593 staticGraph_ = graph;
596 valuesUnpacked_wdv = valuesPacked_wdv;
598 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
600 "At the end of a CrsMatrix constructor that should produce "
601 "a fillComplete matrix, isFillActive() is true." << suffix);
602 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
604 "CrsMatrix constructor that should produce a fillComplete "
605 "matrix, isFillComplete() is false." << suffix);
609 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
612 const Teuchos::RCP<const map_type>& rowMap,
613 const Teuchos::RCP<const map_type>& colMap,
614 const Teuchos::RCP<const map_type>& domainMap,
615 const Teuchos::RCP<const map_type>& rangeMap,
616 const Teuchos::RCP<Teuchos::ParameterList>& params) :
618 storageStatus_ (Details::STORAGE_1D_PACKED),
621 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
622 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
623 "local_matrix_device_type[, RCP<ParameterList>]): ";
624 const char suffix[] =
625 " Please report this bug to the Tpetra developers.";
627 Teuchos::RCP<crs_graph_type> graph;
629 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
630 domainMap, rangeMap, params));
632 catch (std::exception& e) {
633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
634 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
635 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
636 "RCP<ParameterList>]) threw an exception: " << e.what ());
638 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
639 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
640 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
641 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
642 "not produce a fillComplete graph." << suffix);
647 staticGraph_ = graph;
650 valuesUnpacked_wdv = valuesPacked_wdv;
652 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
654 "At the end of a CrsMatrix constructor that should produce "
655 "a fillComplete matrix, isFillActive() is true." << suffix);
656 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
658 "CrsMatrix constructor that should produce a fillComplete "
659 "matrix, isFillComplete() is false." << suffix);
663 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
666 const Teuchos::RCP<const map_type>& rowMap,
667 const Teuchos::RCP<const map_type>& colMap,
668 const Teuchos::RCP<const map_type>& domainMap,
669 const Teuchos::RCP<const map_type>& rangeMap,
670 const Teuchos::RCP<const import_type>& importer,
671 const Teuchos::RCP<const export_type>& exporter,
672 const Teuchos::RCP<Teuchos::ParameterList>& params) :
674 storageStatus_ (Details::STORAGE_1D_PACKED),
678 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
679 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
680 const char suffix[] =
681 " Please report this bug to the Tpetra developers.";
683 Teuchos::RCP<crs_graph_type> graph;
686 domainMap, rangeMap, importer,
689 catch (std::exception& e) {
690 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
691 (
true, std::runtime_error,
"CrsGraph constructor "
692 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
693 "params) threw: " << e.what ());
695 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
696 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
697 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
698 "Export, params) did not produce a fill-complete graph. "
699 "Please report this bug to the Tpetra developers.");
704 staticGraph_ = graph;
707 valuesUnpacked_wdv = valuesPacked_wdv;
709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
711 "At the end of a CrsMatrix constructor that should produce "
712 "a fillComplete matrix, isFillActive() is true." << suffix);
713 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
715 "CrsMatrix constructor that should produce a fillComplete "
716 "matrix, isFillComplete() is false." << suffix);
720 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
723 const Teuchos::DataAccess copyOrView):
725 staticGraph_ (source.getCrsGraph()),
726 storageStatus_ (source.storageStatus_)
728 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
729 "const CrsMatrix&, const Teuchos::DataAccess): ";
730 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
732 "Source graph must be fillComplete().");
734 if (copyOrView == Teuchos::Copy) {
735 using values_type =
typename local_matrix_device_type::values_type;
737 using Kokkos::view_alloc;
738 using Kokkos::WithoutInitializing;
739 values_type newvals (view_alloc (
"val", WithoutInitializing),
744 valuesUnpacked_wdv = valuesPacked_wdv;
747 else if (copyOrView == Teuchos::View) {
753 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
754 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
755 "has an invalid value " << copyOrView <<
". Valid values "
756 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
757 "Teuchos::View = " << Teuchos::View <<
".");
762 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
767 std::swap(crs_matrix.
importMV_, this->importMV_);
768 std::swap(crs_matrix.
exportMV_, this->exportMV_);
769 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
770 std::swap(crs_matrix.myGraph_, this->myGraph_);
771 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
772 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
775 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
778 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
779 Teuchos::RCP<const Teuchos::Comm<int> >
782 return getCrsGraphRef ().getComm ();
785 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
789 return fillComplete_;
792 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
796 return ! fillComplete_;
799 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
803 return this->getCrsGraphRef ().isStorageOptimized ();
806 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
810 return getCrsGraphRef ().isLocallyIndexed ();
813 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
817 return getCrsGraphRef ().isGloballyIndexed ();
820 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
824 return getCrsGraphRef ().hasColMap ();
827 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
831 return getCrsGraphRef ().getGlobalNumEntries ();
834 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
838 return getCrsGraphRef ().getLocalNumEntries ();
841 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
845 return getCrsGraphRef ().getGlobalNumRows ();
848 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
852 return getCrsGraphRef ().getGlobalNumCols ();
855 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
859 return getCrsGraphRef ().getLocalNumRows ();
863 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
867 return getCrsGraphRef ().getLocalNumCols ();
871 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
875 return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
878 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
882 return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
885 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
889 return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
892 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
896 return getCrsGraphRef ().getLocalMaxNumRowEntries ();
899 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
903 return getRowMap ()->getIndexBase ();
906 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
907 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
910 return getCrsGraphRef ().getRowMap ();
913 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
914 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
917 return getCrsGraphRef ().getColMap ();
920 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
921 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
924 return getCrsGraphRef ().getDomainMap ();
927 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
928 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
931 return getCrsGraphRef ().getRangeMap ();
934 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
935 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
938 if (staticGraph_ != Teuchos::null) {
944 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
945 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
948 if (staticGraph_ != Teuchos::null) {
954 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
959 #ifdef HAVE_TPETRA_DEBUG
960 constexpr
bool debug =
true;
962 constexpr
bool debug =
false;
963 #endif // HAVE_TPETRA_DEBUG
965 if (! this->staticGraph_.is_null ()) {
966 return * (this->staticGraph_);
970 const char tfecfFuncName[] =
"getCrsGraphRef: ";
971 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
972 (this->myGraph_.is_null (), std::logic_error,
973 "Both staticGraph_ and myGraph_ are null. "
974 "Please report this bug to the Tpetra developers.");
976 return * (this->myGraph_);
980 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
981 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
985 auto numCols = staticGraph_->getColMap()->getLocalNumElements();
988 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
989 staticGraph_->getLocalGraphDevice());
992 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
993 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
997 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
998 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
999 valuesPacked_wdv.getHostView(Access::ReadWrite),
1000 staticGraph_->getLocalGraphHost());
1003 #if KOKKOSKERNELS_VERSION < 40299
1005 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1006 std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1010 auto localMatrix = getLocalMatrixDevice();
1011 #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL)
1012 if(this->getLocalNumEntries() <=
size_t(Teuchos::OrdinalTraits<LocalOrdinal>::max()))
1014 if(this->ordinalRowptrs.data() ==
nullptr)
1016 auto originalRowptrs = localMatrix.graph.row_map;
1019 this->ordinalRowptrs = ordinal_rowptrs_type(
1020 Kokkos::ViewAllocateWithoutInitializing(
"CrsMatrix::ordinalRowptrs"), originalRowptrs.extent(0));
1021 auto ordinalRowptrs_ = this->ordinalRowptrs;
1022 Kokkos::parallel_for(
"CrsMatrix::getLocalMultiplyOperator::convertRowptrs",
1023 Kokkos::RangePolicy<execution_space>(0, originalRowptrs.extent(0)),
1024 KOKKOS_LAMBDA(LocalOrdinal i)
1026 ordinalRowptrs_(i) = originalRowptrs(i);
1030 return std::make_shared<local_multiply_op_type>(
1031 std::make_shared<local_matrix_device_type>(localMatrix), this->ordinalRowptrs);
1035 return std::make_shared<local_multiply_op_type>(
1036 std::make_shared<local_matrix_device_type>(localMatrix));
1040 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1044 return myGraph_.is_null ();
1047 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1054 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1061 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1070 const char tfecfFuncName[] =
"allocateValues: ";
1071 const char suffix[] =
1072 " Please report this bug to the Tpetra developers.";
1073 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1075 std::unique_ptr<std::string> prefix;
1077 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1078 std::ostringstream os;
1079 os << *prefix <<
"lg: "
1080 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1082 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1083 <<
"Allocated" << endl;
1084 std::cerr << os.str();
1087 const bool debug = Behavior::debug(
"CrsMatrix");
1089 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1090 (this->staticGraph_.is_null (), std::logic_error,
1091 "staticGraph_ is null." << suffix);
1096 if ((gas == GraphAlreadyAllocated) !=
1097 staticGraph_->indicesAreAllocated ()) {
1098 const char err1[] =
"The caller has asserted that the graph "
1100 const char err2[] =
"already allocated, but the static graph "
1101 "says that its indices are ";
1102 const char err3[] =
"already allocated. ";
1103 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1104 (gas == GraphAlreadyAllocated &&
1105 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1106 err1 << err2 <<
"not " << err3 << suffix);
1107 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1108 (gas != GraphAlreadyAllocated &&
1109 staticGraph_->indicesAreAllocated (), std::logic_error,
1110 err1 <<
"not " << err2 << err3 << suffix);
1118 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1119 (! this->staticGraph_->indicesAreAllocated () &&
1120 this->myGraph_.is_null (), std::logic_error,
1121 "The static graph says that its indices are not allocated, "
1122 "but the graph is not owned by the matrix." << suffix);
1125 if (gas == GraphNotYetAllocated) {
1127 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1128 (this->myGraph_.is_null (), std::logic_error,
1129 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1132 this->myGraph_->allocateIndices (lg, verbose);
1134 catch (std::exception& e) {
1135 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1136 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1137 "threw an exception: " << e.what ());
1140 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1141 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1142 "threw an exception not a subclass of std::exception.");
1147 const size_t lclTotalNumEntries = this->staticGraph_->getLocalAllocationSize();
1149 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1150 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1151 (this->staticGraph_->getRowPtrsUnpackedHost()(lclNumRows) != lclTotalNumEntries, std::logic_error,
1152 "length of staticGraph's lclIndsUnpacked does not match final entry of rowPtrsUnapcked_host." << suffix);
1156 using values_type =
typename local_matrix_device_type::values_type;
1158 std::ostringstream os;
1159 os << *prefix <<
"Allocate values_wdv: Pre "
1160 << valuesUnpacked_wdv.extent(0) <<
", post "
1161 << lclTotalNumEntries << endl;
1162 std::cerr << os.str();
1166 values_type(
"Tpetra::CrsMatrix::values",
1167 lclTotalNumEntries));
1171 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1177 using ::Tpetra::Details::getEntryOnHost;
1178 using Teuchos::arcp_const_cast;
1179 using Teuchos::Array;
1180 using Teuchos::ArrayRCP;
1181 using Teuchos::null;
1185 using row_map_type =
typename local_graph_device_type::row_map_type;
1186 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1187 using values_type =
typename local_matrix_device_type::values_type;
1189 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1191 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1192 "fillComplete or expertStaticFillComplete): ";
1193 const char suffix[] =
1194 " Please report this bug to the Tpetra developers.";
1198 std::unique_ptr<std::string> prefix;
1200 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1201 std::ostringstream os;
1202 os << *prefix << endl;
1203 std::cerr << os.str ();
1209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1210 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1211 "(myGraph_) is null. This means that the matrix has a "
1212 "const (a.k.a. \"static\") graph. fillComplete or "
1213 "expertStaticFillComplete should never call "
1214 "fillLocalGraphAndMatrix in that case." << suffix);
1217 const size_t lclNumRows = this->getLocalNumRows ();
1232 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1233 myGraph_->rowPtrsUnpacked_dev_;
1236 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1237 (curRowOffsets.extent (0) == 0, std::logic_error,
1238 "curRowOffsets.extent(0) == 0.");
1239 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1240 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1241 "curRowOffsets.extent(0) = "
1242 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1243 << (lclNumRows + 1) <<
".");
1244 const size_t numOffsets = curRowOffsets.extent (0);
1245 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1246 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1248 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1249 std::logic_error,
"numOffsets = " <<
1250 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1251 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1252 << numOffsets <<
") = " << valToCheck <<
".");
1255 if (myGraph_->getLocalNumEntries() !=
1256 myGraph_->getLocalAllocationSize()) {
1260 typename row_map_type::non_const_type k_ptrs;
1261 row_map_type k_ptrs_const;
1262 lclinds_1d_type k_inds;
1266 std::ostringstream os;
1267 const auto numEnt = myGraph_->getLocalNumEntries();
1268 const auto allocSize = myGraph_->getLocalAllocationSize();
1269 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1270 <<
", allocSize=" << allocSize << endl;
1271 std::cerr << os.str ();
1279 if (debug && curRowOffsets.extent (0) != 0) {
1280 const size_t numOffsets =
1281 static_cast<size_t> (curRowOffsets.extent (0));
1282 const auto valToCheck = myGraph_->getRowPtrsUnpackedHost()(numOffsets - 1);
1283 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1284 (static_cast<size_t> (valToCheck) !=
1285 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1286 std::logic_error,
"(unpacked branch) Before "
1287 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1288 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1289 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1290 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1291 (static_cast<size_t> (valToCheck) !=
1292 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1293 std::logic_error,
"(unpacked branch) Before "
1294 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1295 <<
") = " << valToCheck
1296 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1297 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1305 size_t lclTotalNumEntries = 0;
1311 std::ostringstream os;
1312 os << *prefix <<
"Allocate packed row offsets: "
1313 << (lclNumRows+1) << endl;
1314 std::cerr << os.str ();
1316 typename row_map_type::non_const_type
1317 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1318 typename row_entries_type::const_type numRowEnt_h =
1319 myGraph_->k_numRowEntries_;
1322 lclTotalNumEntries =
1326 k_ptrs = packedRowOffsets;
1327 k_ptrs_const = k_ptrs;
1331 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1332 (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1334 "(unpacked branch) After packing k_ptrs, "
1335 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1336 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1337 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1338 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1339 (valToCheck != lclTotalNumEntries, std::logic_error,
1340 "(unpacked branch) After filling k_ptrs, "
1341 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1342 <<
" != total number of entries on the calling process = "
1343 << lclTotalNumEntries <<
".");
1348 std::ostringstream os;
1349 os << *prefix <<
"Allocate packed local column indices: "
1350 << lclTotalNumEntries << endl;
1351 std::cerr << os.str ();
1353 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1355 std::ostringstream os;
1356 os << *prefix <<
"Allocate packed values: "
1357 << lclTotalNumEntries << endl;
1358 std::cerr << os.str ();
1360 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1372 using inds_packer_type = pack_functor<
1373 typename Graph::local_graph_device_type::entries_type::non_const_type,
1374 typename Graph::local_inds_dualv_type::t_dev::const_type,
1375 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1376 typename Graph::local_graph_device_type::row_map_type>;
1377 inds_packer_type indsPacker (
1379 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1380 k_ptrs, curRowOffsets);
1382 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1383 Kokkos::parallel_for
1384 (
"Tpetra::CrsMatrix pack column indices",
1385 range_type (0, lclNumRows), indsPacker);
1389 using vals_packer_type = pack_functor<
1390 typename values_type::non_const_type,
1391 typename values_type::const_type,
1392 typename row_map_type::non_const_type,
1393 typename row_map_type::const_type>;
1394 vals_packer_type valsPacker (
1396 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1397 k_ptrs, curRowOffsets);
1398 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1399 range_type (0, lclNumRows), valsPacker);
1402 const char myPrefix[] =
"(\"Optimize Storage\""
1403 "=true branch) After packing, ";
1404 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1405 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1406 <<
"k_ptrs.extent(0) = 0. This probably means that "
1407 "rowPtrsUnpacked_ was never allocated.");
1408 if (k_ptrs.extent (0) != 0) {
1409 const size_t numOffsets (k_ptrs.extent (0));
1410 const auto valToCheck =
1411 getEntryOnHost (k_ptrs, numOffsets - 1);
1412 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1413 (
size_t (valToCheck) != k_vals.extent (0),
1414 std::logic_error, myPrefix <<
1415 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1416 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1417 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1418 (
size_t (valToCheck) != k_inds.extent (0),
1419 std::logic_error, myPrefix <<
1420 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1421 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1425 myGraph_->setRowPtrsPacked(k_ptrs_const);
1426 myGraph_->lclIndsPacked_wdv =
1433 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1434 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1435 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1436 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1437 valuesPacked_wdv = valuesUnpacked_wdv;
1440 std::ostringstream os;
1441 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1442 << myGraph_->getRowPtrsUnpackedHost().extent(0) <<
", lclIndsUnpacked_wdv: "
1443 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1444 << valuesUnpacked_wdv.extent(0) << endl;
1445 std::cerr << os.str();
1449 const char myPrefix[] =
1450 "(\"Optimize Storage\"=false branch) ";
1451 auto rowPtrsUnpackedHost = myGraph_->getRowPtrsUnpackedHost();
1452 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1453 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1454 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1455 "that rowPtrsUnpacked_ was never allocated.");
1456 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1457 const size_t numOffsets = rowPtrsUnpackedHost.extent (0);
1458 const auto valToCheck = rowPtrsUnpackedHost(numOffsets - 1);
1459 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1460 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1461 std::logic_error, myPrefix <<
1462 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1463 <<
" != valuesPacked_wdv.extent(0) = "
1464 << valuesPacked_wdv.extent (0) <<
".");
1465 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1466 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1467 std::logic_error, myPrefix <<
1468 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1469 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1470 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1476 const char myPrefix[] =
"After packing, ";
1477 auto rowPtrsPackedHost = myGraph_->getRowPtrsPackedHost();
1478 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1479 (
size_t (rowPtrsPackedHost.extent (0)) !=
size_t (lclNumRows + 1),
1480 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1481 << rowPtrsPackedHost.extent (0) <<
" != lclNumRows+1 = " <<
1482 (lclNumRows+1) <<
".");
1483 if (rowPtrsPackedHost.extent (0) != 0) {
1484 const size_t numOffsets (rowPtrsPackedHost.extent (0));
1485 const size_t valToCheck = rowPtrsPackedHost(numOffsets-1);
1486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1487 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1488 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1489 (numOffsets-1) <<
") = " << valToCheck
1490 <<
" != valuesPacked_wdv.extent(0) = "
1491 << valuesPacked_wdv.extent (0) <<
".");
1492 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1493 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1494 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1495 (numOffsets-1) <<
") = " << valToCheck
1496 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1497 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1505 const bool defaultOptStorage =
1506 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1507 const bool requestOptimizedStorage =
1508 (! params.is_null () &&
1509 params->get (
"Optimize Storage", defaultOptStorage)) ||
1510 (params.is_null () && defaultOptStorage);
1515 if (requestOptimizedStorage) {
1520 std::ostringstream os;
1521 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1522 << myGraph_->k_numRowEntries_.extent(0) << endl;
1523 std::cerr << os.str();
1526 myGraph_->k_numRowEntries_ = row_entries_type ();
1531 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1532 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1533 myGraph_->packedUnpackedRowPtrsMatch_ =
true;
1534 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1535 valuesUnpacked_wdv = valuesPacked_wdv;
1537 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1538 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1542 std::ostringstream os;
1543 os << *prefix <<
"User requested NOT to optimize storage"
1545 std::cerr << os.str();
1550 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1555 using ::Tpetra::Details::ProfilingRegion;
1556 using Teuchos::ArrayRCP;
1557 using Teuchos::Array;
1558 using Teuchos::null;
1562 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1563 using non_const_row_map_type =
typename row_map_type::non_const_type;
1564 using values_type =
typename local_matrix_device_type::values_type;
1565 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1566 const size_t lclNumRows = getLocalNumRows();
1569 std::unique_ptr<std::string> prefix;
1571 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1572 std::ostringstream os;
1573 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1574 std::cerr << os.str ();
1586 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1587 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1588 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1590 row_map_type k_ptrs;
1596 bool requestOptimizedStorage =
true;
1597 const bool default_OptimizeStorage =
1598 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1599 if (! params.is_null() &&
1600 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1601 requestOptimizedStorage =
false;
1608 if (! staticGraph_->isStorageOptimized () &&
1609 requestOptimizedStorage) {
1611 (
true, std::runtime_error,
"You requested optimized storage "
1612 "by setting the \"Optimize Storage\" flag to \"true\" in "
1613 "the ParameterList, or by virtue of default behavior. "
1614 "However, the associated CrsGraph was filled separately and "
1615 "requested not to optimize storage. Therefore, the "
1616 "CrsMatrix cannot optimize storage.");
1617 requestOptimizedStorage =
false;
1642 if (nodeNumEntries != nodeNumAllocated) {
1644 std::ostringstream os;
1645 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1646 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1648 std::cerr << os.str();
1653 std::ostringstream os;
1654 os << *prefix <<
"Allocate packed row offsets: "
1655 << (lclNumRows+1) << endl;
1656 std::cerr << os.str();
1658 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1663 size_t lclTotalNumEntries = 0;
1666 typename row_entries_type::const_type numRowEnt_h =
1667 staticGraph_->k_numRowEntries_;
1669 lclTotalNumEntries =
1676 std::ostringstream os;
1677 os << *prefix <<
"Allocate packed values: "
1678 << lclTotalNumEntries << endl;
1679 std::cerr << os.str ();
1681 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1685 typename values_type::non_const_type,
1686 typename values_type::const_type,
1687 typename row_map_type::non_const_type,
1688 typename row_map_type::const_type> valsPacker
1689 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1690 tmpk_ptrs, k_rowPtrs);
1693 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1694 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1695 range_type (0, lclNumRows), valsPacker);
1699 valuesPacked_wdv = valuesUnpacked_wdv;
1701 std::ostringstream os;
1702 os << *prefix <<
"Storage already packed: "
1703 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1704 std::cerr << os.str();
1709 if (requestOptimizedStorage) {
1712 valuesUnpacked_wdv = valuesPacked_wdv;
1714 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1718 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1723 const typename crs_graph_type::SLocalGlobalViews& newInds,
1724 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1725 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1726 const ELocalGlobal lg,
1727 const ELocalGlobal I)
1729 const size_t oldNumEnt = rowInfo.numEntries;
1730 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1736 if (numInserted > 0) {
1737 const size_t startOffset = oldNumEnt;
1738 memcpy ((
void*) &oldRowVals[startOffset], &newRowVals[0],
1739 numInserted *
sizeof (impl_scalar_type));
1743 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1747 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1748 const Teuchos::ArrayView<const Scalar>& values,
1752 const char tfecfFuncName[] =
"insertLocalValues: ";
1754 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1755 (! this->isFillActive (), std::runtime_error,
1756 "Fill is not active. After calling fillComplete, you must call "
1757 "resumeFill before you may insert entries into the matrix again.");
1758 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1759 (this->isStaticGraph (), std::runtime_error,
1760 "Cannot insert indices with static graph; use replaceLocalValues() "
1764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1765 (graph.
colMap_.is_null (), std::runtime_error,
1766 "Cannot insert local indices without a column map.");
1767 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1769 std::runtime_error,
"Graph indices are global; use "
1770 "insertGlobalValues().");
1771 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1772 (values.size () != indices.size (), std::runtime_error,
1773 "values.size() = " << values.size ()
1774 <<
" != indices.size() = " << indices.size () <<
".");
1775 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1776 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1777 "Local row index " << lclRow <<
" does not belong to this process.");
1779 if (! graph.indicesAreAllocated ()) {
1783 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1786 #ifdef HAVE_TPETRA_DEBUG
1787 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1792 using Teuchos::toString;
1795 Teuchos::Array<LocalOrdinal> badColInds;
1796 bool allInColMap =
true;
1797 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1799 allInColMap =
false;
1800 badColInds.push_back (indices[k]);
1803 if (! allInColMap) {
1804 std::ostringstream os;
1805 os <<
"You attempted to insert entries in owned row " << lclRow
1806 <<
", at the following column indices: " << toString (indices)
1808 os <<
"Of those, the following indices are not in the column Map on "
1809 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1810 "the matrix has a column Map already, it is invalid to insert "
1811 "entries at those locations.";
1812 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1813 (
true, std::invalid_argument, os.str ());
1816 #endif // HAVE_TPETRA_DEBUG
1820 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1822 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1823 valsView[offset] += values[k]; };
1824 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1825 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1826 }
else if (CM ==
INSERT) {
1827 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1828 valsView[offset] = values[k]; };
1829 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1830 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1832 std::ostringstream os;
1833 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1834 <<
"but this has not been implemented." << endl;
1835 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1836 (
true, std::invalid_argument, os.str ());
1840 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1844 const LocalOrdinal numEnt,
1845 const Scalar vals[],
1846 const LocalOrdinal cols[],
1849 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1850 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1851 this->insertLocalValues (localRow, colsT, valsT, CM);
1854 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1859 const GlobalOrdinal gblColInds[],
1861 const size_t numInputEnt)
1863 #ifdef HAVE_TPETRA_DEBUG
1864 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1866 const size_t curNumEnt = rowInfo.numEntries;
1867 #endif // HAVE_TPETRA_DEBUG
1869 if (! graph.indicesAreAllocated ()) {
1872 using ::Tpetra::Details::Behavior;
1873 const bool verbose = Behavior::verbose(
"CrsMatrix");
1874 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1879 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1882 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1883 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1884 valsView[offset] += vals[k];
1886 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1887 #ifdef HAVE_TPETRA_DEBUG
1893 #ifdef HAVE_TPETRA_DEBUG
1894 size_t newNumEnt = curNumEnt + numInserted;
1895 const size_t chkNewNumEnt =
1897 if (chkNewNumEnt != newNumEnt) {
1898 std::ostringstream os;
1899 os << std::endl <<
"newNumEnt = " << newNumEnt
1900 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1901 <<
") = " << chkNewNumEnt <<
"." << std::endl
1902 <<
"\torigNumEnt: " << origNumEnt << std::endl
1903 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1904 <<
"\tgblColInds: [";
1905 for (
size_t k = 0; k < numInputEnt; ++k) {
1906 os << gblColInds[k];
1907 if (k +
size_t (1) < numInputEnt) {
1911 os <<
"]" << std::endl
1913 for (
size_t k = 0; k < numInputEnt; ++k) {
1915 if (k +
size_t (1) < numInputEnt) {
1919 os <<
"]" << std::endl;
1921 if (this->supportsRowViews ()) {
1922 values_host_view_type vals2;
1923 if (this->isGloballyIndexed ()) {
1924 global_inds_host_view_type gblColInds2;
1925 const GlobalOrdinal gblRow =
1926 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1928 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1929 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1933 bool getViewThrew =
false;
1935 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1937 catch (std::exception& e) {
1938 getViewThrew =
true;
1939 os <<
"getGlobalRowView threw exception:" << std::endl
1940 << e.what () << std::endl;
1942 if (! getViewThrew) {
1943 os <<
"\tNew global column indices: ";
1944 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1945 os << gblColInds2[jjj] <<
" ";
1947 os <<
"\tNew values: ";
1948 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1949 os << vals2[jjj] <<
" ";
1954 else if (this->isLocallyIndexed ()) {
1955 local_inds_host_view_type lclColInds2;
1956 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1957 os <<
"\tNew local column indices: ";
1958 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1959 os << lclColInds2[jjj] <<
" ";
1961 os <<
"\tNew values: ";
1962 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1963 os << vals2[jjj] <<
" ";
1968 os <<
"Please report this bug to the Tpetra developers.";
1969 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1970 (
true, std::logic_error, os.str ());
1972 #endif // HAVE_TPETRA_DEBUG
1975 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1979 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1980 const Teuchos::ArrayView<const Scalar>& values)
1982 using Teuchos::toString;
1985 typedef LocalOrdinal LO;
1986 typedef GlobalOrdinal GO;
1987 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
1988 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
1989 const char tfecfFuncName[] =
"insertGlobalValues: ";
1991 #ifdef HAVE_TPETRA_DEBUG
1992 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1993 (values.size () != indices.size (), std::runtime_error,
1994 "values.size() = " << values.size () <<
" != indices.size() = "
1995 << indices.size () <<
".");
1996 #endif // HAVE_TPETRA_DEBUG
2000 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2003 if (lclRow == OTLO::invalid ()) {
2010 this->insertNonownedGlobalValues (gblRow, indices, values);
2013 if (this->isStaticGraph ()) {
2015 const int myRank = rowMap.getComm ()->getRank ();
2016 const int numProcs = rowMap.getComm ()->getSize ();
2017 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2018 (
true, std::runtime_error,
2019 "The matrix was constructed with a constant (\"static\") graph, "
2020 "yet the given global row index " << gblRow <<
" is in the row "
2021 "Map on the calling process (with rank " << myRank <<
", of " <<
2022 numProcs <<
" process(es)). In this case, you may not insert "
2023 "new entries into rows owned by the calling process.");
2027 const IST*
const inputVals =
2028 reinterpret_cast<const IST*
> (values.getRawPtr ());
2029 const GO*
const inputGblColInds = indices.getRawPtr ();
2030 const size_t numInputEnt = indices.size ();
2039 if (! graph.
colMap_.is_null ()) {
2045 #ifdef HAVE_TPETRA_DEBUG
2046 Teuchos::Array<GO> badColInds;
2047 #endif // HAVE_TPETRA_DEBUG
2048 const size_type numEntriesToInsert = indices.size ();
2049 bool allInColMap =
true;
2050 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2052 allInColMap =
false;
2053 #ifdef HAVE_TPETRA_DEBUG
2054 badColInds.push_back (indices[k]);
2057 #endif // HAVE_TPETRA_DEBUG
2060 if (! allInColMap) {
2061 std::ostringstream os;
2062 os <<
"You attempted to insert entries in owned row " << gblRow
2063 <<
", at the following column indices: " << toString (indices)
2065 #ifdef HAVE_TPETRA_DEBUG
2066 os <<
"Of those, the following indices are not in the column Map "
2067 "on this process: " << toString (badColInds) <<
"." << endl
2068 <<
"Since the matrix has a column Map already, it is invalid "
2069 "to insert entries at those locations.";
2071 os <<
"At least one of those indices is not in the column Map "
2072 "on this process." << endl <<
"It is invalid to insert into "
2073 "columns not in the column Map on the process that owns the "
2075 #endif // HAVE_TPETRA_DEBUG
2076 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2077 (
true, std::invalid_argument, os.str ());
2081 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2082 inputVals, numInputEnt);
2087 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2091 const LocalOrdinal numEnt,
2092 const Scalar vals[],
2093 const GlobalOrdinal inds[])
2095 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2096 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2097 this->insertGlobalValues (globalRow, indsT, valsT);
2101 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2105 const GlobalOrdinal gblRow,
2106 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2107 const Teuchos::ArrayView<const Scalar>& values,
2110 typedef impl_scalar_type IST;
2111 typedef LocalOrdinal LO;
2112 typedef GlobalOrdinal GO;
2113 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2114 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2117 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2118 (values.size () != indices.size (), std::runtime_error,
2119 "values.size() = " << values.size () <<
" != indices.size() = "
2120 << indices.size () <<
".");
2125 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2126 const LO lclRow = rowMap.getLocalElement (gblRow);
2127 if (lclRow == OTLO::invalid ()) {
2134 this->insertNonownedGlobalValues (gblRow, indices, values);
2137 if (this->isStaticGraph ()) {
2139 const int myRank = rowMap.getComm ()->getRank ();
2140 const int numProcs = rowMap.getComm ()->getSize ();
2141 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2142 (
true, std::runtime_error,
2143 "The matrix was constructed with a constant (\"static\") graph, "
2144 "yet the given global row index " << gblRow <<
" is in the row "
2145 "Map on the calling process (with rank " << myRank <<
", of " <<
2146 numProcs <<
" process(es)). In this case, you may not insert "
2147 "new entries into rows owned by the calling process.");
2150 crs_graph_type& graph = * (this->myGraph_);
2151 const IST*
const inputVals =
2152 reinterpret_cast<const IST*
> (values.getRawPtr ());
2153 const GO*
const inputGblColInds = indices.getRawPtr ();
2154 const size_t numInputEnt = indices.size ();
2155 RowInfo rowInfo = graph.getRowInfo (lclRow);
2157 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2164 const map_type& colMap = * (graph.colMap_);
2165 size_t curOffset = 0;
2166 while (curOffset < numInputEnt) {
2170 Teuchos::Array<LO> lclIndices;
2171 size_t endOffset = curOffset;
2172 for ( ; endOffset < numInputEnt; ++endOffset) {
2173 auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2174 if (lclIndex != OTLO::invalid())
2175 lclIndices.push_back(lclIndex);
2182 const LO numIndInSeq = (endOffset - curOffset);
2183 if (numIndInSeq != 0) {
2184 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2190 const bool invariant = endOffset == numInputEnt ||
2191 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2192 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2193 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2195 curOffset = endOffset + 1;
2198 else if (! graph.colMap_.is_null ()) {
2199 const map_type& colMap = * (graph.colMap_);
2200 size_t curOffset = 0;
2201 while (curOffset < numInputEnt) {
2205 size_t endOffset = curOffset;
2206 for ( ; endOffset < numInputEnt &&
2207 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2213 const LO numIndInSeq = (endOffset - curOffset);
2214 if (numIndInSeq != 0) {
2215 rowInfo = graph.getRowInfo(lclRow);
2216 this->insertGlobalValuesImpl (graph, rowInfo,
2217 inputGblColInds + curOffset,
2218 inputVals + curOffset,
2225 const bool invariant = endOffset == numInputEnt ||
2226 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2227 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2228 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2230 curOffset = endOffset + 1;
2234 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2235 inputVals, numInputEnt);
2240 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2242 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2243 insertGlobalValuesFilteredChecked(
2244 const GlobalOrdinal gblRow,
2245 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2246 const Teuchos::ArrayView<const Scalar>& values,
2247 const char*
const prefix,
2255 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2257 catch(std::exception& e) {
2258 std::ostringstream os;
2260 const size_t maxNumToPrint =
2262 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2263 "exception: " << e.what() << endl
2264 <<
"Global row index: " << gblRow << endl;
2272 os <<
": insertGlobalValuesFiltered threw an exception: "
2275 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2279 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2285 const LocalOrdinal inds[],
2287 const LocalOrdinal numElts)
2289 typedef LocalOrdinal LO;
2290 typedef GlobalOrdinal GO;
2291 const bool sorted = graph.
isSorted ();
2301 for (LO j = 0; j < numElts; ++j) {
2302 const LO lclColInd = inds[j];
2303 const size_t offset =
2304 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2305 lclColInd, hint, sorted);
2306 if (offset != rowInfo.numEntries) {
2307 rowVals[offset] = newVals[j];
2314 if (graph.
colMap_.is_null ()) {
2315 return Teuchos::OrdinalTraits<LO>::invalid ();
2323 for (LO j = 0; j < numElts; ++j) {
2325 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2326 const size_t offset =
2327 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2328 gblColInd, hint, sorted);
2329 if (offset != rowInfo.numEntries) {
2330 rowVals[offset] = newVals[j];
2349 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2353 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2354 const Teuchos::ArrayView<const Scalar>& vals)
2356 typedef LocalOrdinal LO;
2358 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2359 if (static_cast<LO> (vals.size ()) != numInputEnt) {
2360 return Teuchos::OrdinalTraits<LO>::invalid ();
2362 const LO*
const inputInds = lclCols.getRawPtr ();
2363 const Scalar*
const inputVals = vals.getRawPtr ();
2364 return this->replaceLocalValues (localRow, numInputEnt,
2365 inputVals, inputInds);
2368 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2374 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2375 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2378 const LO numInputEnt = inputInds.extent(0);
2379 if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2380 return Teuchos::OrdinalTraits<LO>::invalid();
2382 const Scalar*
const inVals =
2383 reinterpret_cast<const Scalar*
>(inputVals.data());
2384 return this->replaceLocalValues(localRow, numInputEnt,
2385 inVals, inputInds.data());
2388 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2392 const LocalOrdinal numEnt,
2393 const Scalar inputVals[],
2394 const LocalOrdinal inputCols[])
2397 typedef LocalOrdinal LO;
2399 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2401 return Teuchos::OrdinalTraits<LO>::invalid ();
2406 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2409 return static_cast<LO
> (0);
2411 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2412 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2413 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2414 inputCols, inVals, numEnt);
2417 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2423 const GlobalOrdinal inds[],
2425 const LocalOrdinal numElts)
2427 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2429 [&](
size_t const k,
size_t const ,
size_t const offset) {
2430 rowVals[offset] = newVals[k];
2432 std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2436 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2440 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2441 const Teuchos::ArrayView<const Scalar>& inputVals)
2443 typedef LocalOrdinal LO;
2445 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2446 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2447 return Teuchos::OrdinalTraits<LO>::invalid ();
2449 return this->replaceGlobalValues (globalRow, numInputEnt,
2450 inputVals.getRawPtr (),
2451 inputGblColInds.getRawPtr ());
2454 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2458 const LocalOrdinal numEnt,
2459 const Scalar inputVals[],
2460 const GlobalOrdinal inputGblColInds[])
2463 typedef LocalOrdinal LO;
2465 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2467 return Teuchos::OrdinalTraits<LO>::invalid ();
2472 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2475 return static_cast<LO
> (0);
2478 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2479 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2480 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2481 inputGblColInds, inVals, numEnt);
2484 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2490 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2491 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2500 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2501 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2502 return Teuchos::OrdinalTraits<LO>::invalid();
2504 const Scalar*
const inVals =
2505 reinterpret_cast<const Scalar*
>(inputVals.data());
2506 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2510 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2516 const GlobalOrdinal inds[],
2518 const LocalOrdinal numElts,
2521 typedef LocalOrdinal LO;
2522 typedef GlobalOrdinal GO;
2524 const bool sorted = graph.
isSorted ();
2533 if (graph.
colMap_.is_null ()) {
2544 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2546 for (LO j = 0; j < numElts; ++j) {
2548 if (lclColInd != LINV) {
2549 const size_t offset =
2550 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2551 lclColInd, hint, sorted);
2552 if (offset != rowInfo.numEntries) {
2554 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2557 rowVals[offset] += newVals[j];
2570 for (LO j = 0; j < numElts; ++j) {
2571 const GO gblColInd = inds[j];
2572 const size_t offset =
2573 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2574 gblColInd, hint, sorted);
2575 if (offset != rowInfo.numEntries) {
2577 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2580 rowVals[offset] += newVals[j];
2594 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2598 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2599 const Teuchos::ArrayView<const Scalar>& inputVals,
2602 typedef LocalOrdinal LO;
2604 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2605 if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2606 return Teuchos::OrdinalTraits<LO>::invalid ();
2608 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2609 inputVals.getRawPtr (),
2610 inputGblColInds.getRawPtr (),
2614 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2618 const LocalOrdinal numInputEnt,
2619 const Scalar inputVals[],
2620 const GlobalOrdinal inputGblColInds[],
2624 typedef LocalOrdinal LO;
2625 typedef GlobalOrdinal GO;
2627 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2629 return Teuchos::OrdinalTraits<LO>::invalid ();
2634 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2639 using Teuchos::ArrayView;
2640 ArrayView<const GO> inputGblColInds_av(
2641 numInputEnt == 0 ?
nullptr : inputGblColInds,
2643 ArrayView<const Scalar> inputVals_av(
2644 numInputEnt == 0 ?
nullptr :
2645 inputVals, numInputEnt);
2650 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2661 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2662 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2663 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2664 inputGblColInds, inVals,
2665 numInputEnt, atomic);
2669 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2673 const LocalOrdinal numInputEnt,
2674 const impl_scalar_type inputVals[],
2675 const LocalOrdinal inputCols[],
2676 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2679 using Tpetra::Details::OrdinalTraits;
2680 typedef LocalOrdinal LO;
2682 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2684 return Teuchos::OrdinalTraits<LO>::invalid ();
2686 const crs_graph_type& graph = * (this->staticGraph_);
2687 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2689 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2692 return static_cast<LO
> (0);
2694 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2695 return this->transformLocalValues (curRowVals.data (), graph,
2696 rowInfo, inputCols, inputVals,
2697 numInputEnt, f, atomic);
2700 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2702 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2703 transformGlobalValues (
const GlobalOrdinal gblRow,
2704 const LocalOrdinal numInputEnt,
2705 const impl_scalar_type inputVals[],
2706 const GlobalOrdinal inputCols[],
2707 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2710 using Tpetra::Details::OrdinalTraits;
2711 typedef LocalOrdinal LO;
2713 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2715 return OrdinalTraits<LO>::invalid ();
2717 const crs_graph_type& graph = * (this->staticGraph_);
2718 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2720 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2723 return static_cast<LO
> (0);
2725 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2726 return this->transformGlobalValues (curRowVals.data (), graph,
2727 rowInfo, inputCols, inputVals,
2728 numInputEnt, f, atomic);
2731 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2733 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2734 transformLocalValues (impl_scalar_type rowVals[],
2735 const crs_graph_type& graph,
2736 const RowInfo& rowInfo,
2737 const LocalOrdinal inds[],
2738 const impl_scalar_type newVals[],
2739 const LocalOrdinal numElts,
2740 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2743 typedef impl_scalar_type ST;
2744 typedef LocalOrdinal LO;
2745 typedef GlobalOrdinal GO;
2752 const bool sorted = graph.isSorted ();
2757 if (graph.isLocallyIndexed ()) {
2760 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2762 for (LO j = 0; j < numElts; ++j) {
2763 const LO lclColInd = inds[j];
2764 const size_t offset =
2765 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2766 lclColInd, hint, sorted);
2767 if (offset != rowInfo.numEntries) {
2776 volatile ST*
const dest = &rowVals[offset];
2777 (void) atomic_binary_function_update (dest, newVals[j], f);
2781 rowVals[offset] = f (rowVals[offset], newVals[j]);
2788 else if (graph.isGloballyIndexed ()) {
2792 if (graph.colMap_.is_null ()) {
2799 const map_type& colMap = * (graph.colMap_);
2802 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2804 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2805 for (LO j = 0; j < numElts; ++j) {
2806 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2807 if (gblColInd != GINV) {
2808 const size_t offset =
2809 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2810 gblColInd, hint, sorted);
2811 if (offset != rowInfo.numEntries) {
2820 volatile ST*
const dest = &rowVals[offset];
2821 (void) atomic_binary_function_update (dest, newVals[j], f);
2825 rowVals[offset] = f (rowVals[offset], newVals[j]);
2840 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2842 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2843 transformGlobalValues (impl_scalar_type rowVals[],
2844 const crs_graph_type& graph,
2845 const RowInfo& rowInfo,
2846 const GlobalOrdinal inds[],
2847 const impl_scalar_type newVals[],
2848 const LocalOrdinal numElts,
2849 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2852 typedef impl_scalar_type ST;
2853 typedef LocalOrdinal LO;
2854 typedef GlobalOrdinal GO;
2861 const bool sorted = graph.isSorted ();
2866 if (graph.isGloballyIndexed ()) {
2869 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2871 for (LO j = 0; j < numElts; ++j) {
2872 const GO gblColInd = inds[j];
2873 const size_t offset =
2874 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2875 gblColInd, hint, sorted);
2876 if (offset != rowInfo.numEntries) {
2885 volatile ST*
const dest = &rowVals[offset];
2886 (void) atomic_binary_function_update (dest, newVals[j], f);
2890 rowVals[offset] = f (rowVals[offset], newVals[j]);
2897 else if (graph.isLocallyIndexed ()) {
2901 if (graph.colMap_.is_null ()) {
2907 const map_type& colMap = * (graph.colMap_);
2910 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2912 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2913 for (LO j = 0; j < numElts; ++j) {
2914 const LO lclColInd = colMap.getLocalElement (inds[j]);
2915 if (lclColInd != LINV) {
2916 const size_t offset =
2917 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2918 lclColInd, hint, sorted);
2919 if (offset != rowInfo.numEntries) {
2928 volatile ST*
const dest = &rowVals[offset];
2929 (void) atomic_binary_function_update (dest, newVals[j], f);
2933 rowVals[offset] = f (rowVals[offset], newVals[j]);
2948 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2954 const LocalOrdinal inds[],
2956 const LocalOrdinal numElts,
2959 typedef LocalOrdinal LO;
2960 typedef GlobalOrdinal GO;
2962 const bool sorted = graph.
isSorted ();
2972 for (LO j = 0; j < numElts; ++j) {
2973 const LO lclColInd = inds[j];
2974 const size_t offset =
2975 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2976 lclColInd, hint, sorted);
2977 if (offset != rowInfo.numEntries) {
2979 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2982 rowVals[offset] += newVals[j];
2990 if (graph.
colMap_.is_null ()) {
2991 return Teuchos::OrdinalTraits<LO>::invalid ();
2999 for (LO j = 0; j < numElts; ++j) {
3001 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3002 const size_t offset =
3003 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3004 gblColInd, hint, sorted);
3005 if (offset != rowInfo.numEntries) {
3007 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3010 rowVals[offset] += newVals[j];
3030 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3034 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3035 const Teuchos::ArrayView<const Scalar>& values,
3039 const LO numInputEnt =
static_cast<LO
>(indices.size());
3040 if (static_cast<LO>(values.size()) != numInputEnt) {
3041 return Teuchos::OrdinalTraits<LO>::invalid();
3043 const LO*
const inputInds = indices.getRawPtr();
3044 const scalar_type*
const inputVals = values.getRawPtr();
3045 return this->sumIntoLocalValues(localRow, numInputEnt,
3046 inputVals, inputInds, atomic);
3049 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3055 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3056 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3060 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3061 if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3062 return Teuchos::OrdinalTraits<LO>::invalid();
3065 reinterpret_cast<const scalar_type*
>(inputVals.data());
3066 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3067 inputInds.data(), atomic);
3070 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3074 const LocalOrdinal numEnt,
3075 const Scalar vals[],
3076 const LocalOrdinal cols[],
3080 typedef LocalOrdinal LO;
3082 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3084 return Teuchos::OrdinalTraits<LO>::invalid ();
3089 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3092 return static_cast<LO
> (0);
3094 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3095 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3096 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3097 cols, inputVals, numEnt, atomic);
3100 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3102 values_dualv_type::t_host::const_type
3106 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3107 return typename values_dualv_type::t_host::const_type ();
3109 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3116 values_dualv_type::t_host
3120 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3121 return typename values_dualv_type::t_host ();
3123 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3128 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3130 values_dualv_type::t_dev::const_type
3134 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3135 return typename values_dualv_type::t_dev::const_type ();
3137 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3142 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3144 values_dualv_type::t_dev
3148 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3149 return typename values_dualv_type::t_dev ();
3151 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3157 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3161 nonconst_local_inds_host_view_type &indices,
3162 nonconst_values_host_view_type &values,
3163 size_t& numEntries)
const
3165 using Teuchos::ArrayView;
3166 using Teuchos::av_reinterpret_cast;
3167 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3170 (! this->hasColMap (), std::runtime_error,
3171 "The matrix does not have a column Map yet. This means we don't have "
3172 "local indices for columns yet, so it doesn't make sense to call this "
3173 "method. If the matrix doesn't have a column Map yet, you should call "
3174 "fillComplete on it first.");
3176 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3177 const size_t theNumEntries = rowinfo.numEntries;
3178 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3179 (static_cast<size_t> (indices.size ()) < theNumEntries ||
3180 static_cast<size_t> (values.size ()) < theNumEntries,
3181 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3182 theNumEntries <<
" entry/ies, but indices.size() = " <<
3183 indices.size () <<
" and values.size() = " << values.size () <<
".");
3184 numEntries = theNumEntries;
3186 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3187 if (staticGraph_->isLocallyIndexed ()) {
3188 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3189 auto curVals = getValuesViewHost(rowinfo);
3191 for (
size_t j = 0; j < theNumEntries; ++j) {
3192 values[j] = curVals[j];
3193 indices[j] = curLclInds(j);
3196 else if (staticGraph_->isGloballyIndexed ()) {
3198 const map_type& colMap = * (staticGraph_->colMap_);
3199 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3200 auto curVals = getValuesViewHost(rowinfo);
3202 for (
size_t j = 0; j < theNumEntries; ++j) {
3203 values[j] = curVals[j];
3211 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3215 nonconst_global_inds_host_view_type &indices,
3216 nonconst_values_host_view_type &values,
3217 size_t& numEntries)
const
3219 using Teuchos::ArrayView;
3220 using Teuchos::av_reinterpret_cast;
3221 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3224 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3225 const size_t theNumEntries = rowinfo.numEntries;
3226 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3227 static_cast<size_t> (indices.size ()) < theNumEntries ||
3228 static_cast<size_t> (values.size ()) < theNumEntries,
3229 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3230 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3231 indices.size () <<
" and values.size() = " << values.size () <<
".");
3232 numEntries = theNumEntries;
3234 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3235 if (staticGraph_->isLocallyIndexed ()) {
3236 const map_type& colMap = * (staticGraph_->colMap_);
3237 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3238 auto curVals = getValuesViewHost(rowinfo);
3240 for (
size_t j = 0; j < theNumEntries; ++j) {
3241 values[j] = curVals[j];
3245 else if (staticGraph_->isGloballyIndexed ()) {
3246 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3247 auto curVals = getValuesViewHost(rowinfo);
3249 for (
size_t j = 0; j < theNumEntries; ++j) {
3250 values[j] = curVals[j];
3251 indices[j] = curGblInds(j);
3258 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3262 local_inds_host_view_type &indices,
3263 values_host_view_type &values)
const
3265 const char tfecfFuncName[] =
"getLocalRowView: ";
3267 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3268 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3269 "its indices as global indices, so you cannot get a view with local "
3270 "column indices. If the matrix has a column Map, you may call "
3271 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3272 "a view with global column indices by calling getGlobalRowCopy().");
3274 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3275 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3276 rowInfo.numEntries > 0) {
3277 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3281 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3288 indices = local_inds_host_view_type();
3289 values = values_host_view_type();
3292 #ifdef HAVE_TPETRA_DEBUG
3293 const char suffix[] =
". This should never happen. Please report this "
3294 "bug to the Tpetra developers.";
3295 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3296 (static_cast<size_t> (indices.size ()) !=
3297 static_cast<size_t> (values.size ()), std::logic_error,
3298 "At the end of this method, for local row " << localRow <<
", "
3299 "indices.size() = " << indices.size () <<
" != values.size () = "
3300 << values.size () << suffix);
3301 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3302 (static_cast<size_t> (indices.size ()) !=
3303 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3304 "At the end of this method, for local row " << localRow <<
", "
3305 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3306 << rowInfo.numEntries << suffix);
3307 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3309 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3310 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3311 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3312 expectedNumEntries << suffix);
3313 #endif // HAVE_TPETRA_DEBUG
3317 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3321 global_inds_host_view_type &indices,
3322 values_host_view_type &values)
const
3324 const char tfecfFuncName[] =
"getGlobalRowView: ";
3326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3327 isLocallyIndexed (), std::runtime_error,
3328 "The matrix is locally indexed, so we cannot return a view of the row "
3329 "with global column indices. Use getGlobalRowCopy() instead.");
3334 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3335 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3336 rowInfo.numEntries > 0) {
3337 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3340 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3345 indices = global_inds_host_view_type();
3346 values = values_host_view_type();
3349 #ifdef HAVE_TPETRA_DEBUG
3350 const char suffix[] =
". This should never happen. Please report this "
3351 "bug to the Tpetra developers.";
3352 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3353 (static_cast<size_t> (indices.size ()) !=
3354 static_cast<size_t> (values.size ()), std::logic_error,
3355 "At the end of this method, for global row " << globalRow <<
", "
3356 "indices.size() = " << indices.size () <<
" != values.size () = "
3357 << values.size () << suffix);
3358 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3359 (static_cast<size_t> (indices.size ()) !=
3360 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3361 "At the end of this method, for global row " << globalRow <<
", "
3362 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3363 << rowInfo.numEntries << suffix);
3364 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3365 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3366 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3367 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3368 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3369 " " << expectedNumEntries << suffix);
3370 #endif // HAVE_TPETRA_DEBUG
3374 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3381 const size_t nlrs = staticGraph_->getLocalNumRows ();
3382 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3383 if (! staticGraph_->indicesAreAllocated () ||
3384 nlrs == 0 || numEntries == 0) {
3389 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3390 KokkosBlas::scal(vals, theAlpha, vals);
3395 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3406 const size_t numEntries = staticGraph_->getLocalNumEntries();
3407 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3415 Kokkos::fence(
"CrsMatrix::setAllToScalar");
3419 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3422 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3423 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3424 const typename local_matrix_device_type::values_type& values)
3427 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3428 const char tfecfFuncName[] =
"setAllValues: ";
3429 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3430 (columnIndices.size () != values.size (), std::invalid_argument,
3431 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3432 " = " << values.size () <<
".");
3433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3434 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3437 myGraph_->setAllIndices (rowPointers, columnIndices);
3439 catch (std::exception &e) {
3440 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3441 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3442 "exception: " << e.what ());
3449 auto lclGraph = myGraph_->getLocalGraphDevice ();
3450 const size_t numEnt = lclGraph.entries.extent (0);
3451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3452 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3453 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3454 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3455 "local graph. Please report this bug to the Tpetra developers.");
3458 valuesUnpacked_wdv = valuesPacked_wdv;
3462 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3464 checkInternalState ();
3467 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3473 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3475 auto graph = localDeviceMatrix.graph;
3478 auto rows = graph.row_map;
3479 auto columns = graph.entries;
3480 auto values = localDeviceMatrix.values;
3482 setAllValues(rows,columns,values);
3485 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3489 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3490 const Teuchos::ArrayRCP<Scalar>& val)
3492 using Kokkos::Compat::getKokkosViewDeepCopy;
3493 using Teuchos::ArrayRCP;
3494 using Teuchos::av_reinterpret_cast;
3497 typedef typename local_graph_device_type::row_map_type row_map_type;
3499 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3505 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3506 Kokkos::View<
const size_t*,
3507 typename row_map_type::array_layout,
3509 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3512 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3513 (ptrNative.extent (0) != ptrSizeT.extent (0),
3514 std::logic_error,
"ptrNative.extent(0) = " <<
3515 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3516 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3517 "Tpetra developers.");
3519 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3520 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3521 this->setAllValues (ptrNative, indIn, valIn);
3524 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3529 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3530 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3531 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3538 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3539 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3540 offsets.resize (lclNumRows);
3546 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3551 Kokkos::MemoryUnmanaged> output_type;
3552 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3553 staticGraph_->getLocalDiagOffsets (offsetsOut);
3556 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3557 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3558 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3559 Kokkos::MemoryUnmanaged> output_type;
3560 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3566 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3571 using Teuchos::ArrayRCP;
3572 using Teuchos::ArrayView;
3573 using Teuchos::av_reinterpret_cast;
3574 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3579 staticGraph_.is_null (), std::runtime_error,
3580 "This method requires that the matrix have a graph.");
3581 auto rowMapPtr = this->getRowMap ();
3582 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3588 auto colMapPtr = this->getColMap ();
3589 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3590 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3591 "This method requires that the matrix have a column Map.");
3592 const map_type& rowMap = * rowMapPtr;
3593 const map_type& colMap = * colMapPtr;
3594 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3596 #ifdef HAVE_TPETRA_DEBUG
3599 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3600 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3601 "The input Vector's Map must be compatible with the CrsMatrix's row "
3602 "Map. You may check this by using Map's isCompatible method: "
3603 "diag.getMap ()->isCompatible (A.getRowMap ());");
3604 #endif // HAVE_TPETRA_DEBUG
3606 if (this->isFillComplete ()) {
3609 const auto D_lcl_1d =
3610 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3612 const auto lclRowMap = rowMap.getLocalMap ();
3617 getLocalMatrixDevice ());
3625 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3630 Kokkos::MemoryUnmanaged>& offsets)
const
3632 typedef LocalOrdinal LO;
3634 #ifdef HAVE_TPETRA_DEBUG
3635 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3636 const map_type& rowMap = * (this->getRowMap ());
3639 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3640 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3641 "The input Vector's Map must be compatible with (in the sense of Map::"
3642 "isCompatible) the CrsMatrix's row Map.");
3643 #endif // HAVE_TPETRA_DEBUG
3653 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3656 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3658 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3659 getLocalMatrixDevice ());
3662 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3666 const Teuchos::ArrayView<const size_t>& offsets)
const
3668 using LO = LocalOrdinal;
3669 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3672 #ifdef HAVE_TPETRA_DEBUG
3673 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3674 const map_type& rowMap = * (this->getRowMap ());
3677 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3678 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3679 "The input Vector's Map must be compatible with (in the sense of Map::"
3680 "isCompatible) the CrsMatrix's row Map.");
3681 #endif // HAVE_TPETRA_DEBUG
3693 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3695 using host_offsets_view_type =
3696 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3697 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3698 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3700 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3701 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3702 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3704 auto rowPtrsPackedHost = staticGraph_->getRowPtrsPackedHost();
3705 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3706 Kokkos::parallel_for
3707 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3708 range_type (0, myNumRows),
3709 [&, INV, h_offsets] (
const LO lclRow) {
3710 lclVecHost1d(lclRow) = STS::zero ();
3711 if (h_offsets[lclRow] != INV) {
3712 auto curRowOffset = rowPtrsPackedHost (lclRow);
3713 lclVecHost1d(lclRow) =
3714 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3721 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3726 using ::Tpetra::Details::ProfilingRegion;
3727 using Teuchos::ArrayRCP;
3728 using Teuchos::ArrayView;
3729 using Teuchos::null;
3732 using Teuchos::rcpFromRef;
3734 const char tfecfFuncName[] =
"leftScale: ";
3736 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3738 RCP<const vec_type> xp;
3739 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3742 auto exporter = this->getCrsGraphRef ().getExporter ();
3743 if (exporter.get () !=
nullptr) {
3744 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3745 tempVec->doImport (x, *exporter,
REPLACE);
3749 xp = rcpFromRef (x);
3752 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3753 xp = rcpFromRef (x);
3756 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3757 (
true, std::invalid_argument,
"x's Map must be the same as "
3758 "either the row Map or the range Map of the CrsMatrix.");
3761 if (this->isFillComplete()) {
3762 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3763 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3766 x_lcl_1d,
false,
false);
3770 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3771 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3776 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3781 using ::Tpetra::Details::ProfilingRegion;
3782 using Teuchos::ArrayRCP;
3783 using Teuchos::ArrayView;
3784 using Teuchos::null;
3787 using Teuchos::rcpFromRef;
3789 const char tfecfFuncName[] =
"rightScale: ";
3791 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3793 RCP<const vec_type> xp;
3794 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3797 auto importer = this->getCrsGraphRef ().getImporter ();
3798 if (importer.get () !=
nullptr) {
3799 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3800 tempVec->doImport (x, *importer,
REPLACE);
3804 xp = rcpFromRef (x);
3807 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3808 xp = rcpFromRef (x);
3810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3811 (
true, std::runtime_error,
"x's Map must be the same as "
3812 "either the domain Map or the column Map of the CrsMatrix.");
3815 if (this->isFillComplete()) {
3816 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3817 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3820 x_lcl_1d,
false,
false);
3824 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3825 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3830 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3835 using Teuchos::ArrayView;
3836 using Teuchos::outArg;
3837 using Teuchos::REDUCE_SUM;
3838 using Teuchos::reduceAll;
3846 if (getLocalNumEntries() > 0) {
3847 if (isStorageOptimized ()) {
3850 const size_t numEntries = getLocalNumEntries ();
3851 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3852 for (
size_t k = 0; k < numEntries; ++k) {
3853 auto val = values[k];
3857 const mag_type val_abs = STS::abs (val);
3858 mySum += val_abs * val_abs;
3862 const LocalOrdinal numRows =
3863 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3864 for (LocalOrdinal r = 0; r < numRows; ++r) {
3865 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3866 const size_t numEntries = rowInfo.numEntries;
3867 auto A_r = this->getValuesViewHost(rowInfo);
3868 for (
size_t k = 0; k < numEntries; ++k) {
3870 const mag_type val_abs = STS::abs (val);
3871 mySum += val_abs * val_abs;
3877 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3878 mySum, outArg (totalSum));
3879 return STM::sqrt (totalSum);
3882 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3887 const char tfecfFuncName[] =
"replaceColMap: ";
3891 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3892 myGraph_.is_null (), std::runtime_error,
3893 "This method does not work if the matrix has a const graph. The whole "
3894 "idea of a const graph is that you are not allowed to change it, but "
3895 "this method necessarily must modify the graph, since the graph owns "
3896 "the matrix's column Map.");
3897 myGraph_->replaceColMap (newColMap);
3900 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3904 const Teuchos::RCP<const map_type>& newColMap,
3905 const Teuchos::RCP<const import_type>& newImport,
3906 const bool sortEachRow)
3908 const char tfecfFuncName[] =
"reindexColumns: ";
3909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3910 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3911 "The input graph is null, but the matrix does not own its graph.");
3913 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3914 const bool sortGraph =
false;
3918 if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3919 const LocalOrdinal lclNumRows =
3920 static_cast<LocalOrdinal
> (theGraph.getLocalNumRows ());
3922 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3924 const RowInfo rowInfo = theGraph.getRowInfo (row);
3925 auto lclColInds = theGraph.getLocalIndsViewHostNonConst (rowInfo);
3926 auto vals = this->getValuesViewHostNonConst (rowInfo);
3928 sort2 (lclColInds.data (),
3929 lclColInds.data () + rowInfo.numEntries,
3932 theGraph.indicesAreSorted_ =
true;
3936 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3941 const char tfecfFuncName[] =
"replaceDomainMap: ";
3942 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3943 myGraph_.is_null (), std::runtime_error,
3944 "This method does not work if the matrix has a const graph. The whole "
3945 "idea of a const graph is that you are not allowed to change it, but this"
3946 " method necessarily must modify the graph, since the graph owns the "
3947 "matrix's domain Map and Import objects.");
3948 myGraph_->replaceDomainMap (newDomainMap);
3951 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3955 Teuchos::RCP<const import_type>& newImporter)
3957 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3958 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3959 myGraph_.is_null (), std::runtime_error,
3960 "This method does not work if the matrix has a const graph. The whole "
3961 "idea of a const graph is that you are not allowed to change it, but this"
3962 " method necessarily must modify the graph, since the graph owns the "
3963 "matrix's domain Map and Import objects.");
3964 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3967 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3972 const char tfecfFuncName[] =
"replaceRangeMap: ";
3973 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3974 myGraph_.is_null (), std::runtime_error,
3975 "This method does not work if the matrix has a const graph. The whole "
3976 "idea of a const graph is that you are not allowed to change it, but this"
3977 " method necessarily must modify the graph, since the graph owns the "
3978 "matrix's domain Map and Import objects.");
3979 myGraph_->replaceRangeMap (newRangeMap);
3982 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3986 Teuchos::RCP<const export_type>& newExporter)
3988 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
3989 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3990 myGraph_.is_null (), std::runtime_error,
3991 "This method does not work if the matrix has a const graph. The whole "
3992 "idea of a const graph is that you are not allowed to change it, but this"
3993 " method necessarily must modify the graph, since the graph owns the "
3994 "matrix's domain Map and Import objects.");
3995 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
3998 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4002 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4003 const Teuchos::ArrayView<const Scalar>& values)
4005 using Teuchos::Array;
4006 typedef GlobalOrdinal GO;
4007 typedef typename Array<GO>::size_type size_type;
4009 const size_type numToInsert = indices.size ();
4012 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4013 Array<GO>& curRowInds = curRow.first;
4014 Array<Scalar>& curRowVals = curRow.second;
4015 const size_type newCapacity = curRowInds.size () + numToInsert;
4016 curRowInds.reserve (newCapacity);
4017 curRowVals.reserve (newCapacity);
4018 for (size_type k = 0; k < numToInsert; ++k) {
4019 curRowInds.push_back (indices[k]);
4020 curRowVals.push_back (values[k]);
4024 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4031 using Teuchos::Comm;
4032 using Teuchos::outArg;
4035 using Teuchos::REDUCE_MAX;
4036 using Teuchos::REDUCE_MIN;
4037 using Teuchos::reduceAll;
4041 typedef GlobalOrdinal GO;
4042 typedef typename Teuchos::Array<GO>::size_type size_type;
4043 const char tfecfFuncName[] =
"globalAssemble: ";
4044 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4046 const bool verbose = Behavior::verbose(
"CrsMatrix");
4047 std::unique_ptr<std::string> prefix;
4049 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4050 std::ostringstream os;
4051 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4053 std::cerr << os.str();
4055 RCP<const Comm<int> > comm = getComm ();
4057 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4058 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4059 "you may call this method.");
4061 const size_t myNumNonlocalRows = nonlocals_.size ();
4068 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4069 int someoneHasNonlocalRows = 0;
4070 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4071 outArg (someoneHasNonlocalRows));
4072 if (someoneHasNonlocalRows == 0) {
4086 RCP<const map_type> nonlocalRowMap;
4087 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4089 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4090 size_type curPos = 0;
4091 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4092 ++mapIter, ++curPos) {
4093 myNonlocalGblRows[curPos] = mapIter->first;
4096 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4097 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4104 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4105 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4106 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4107 merge2 (gblCols_newEnd, vals_newEnd,
4108 gblCols.begin (), gblCols.end (),
4109 vals.begin (), vals.end ());
4110 gblCols.erase (gblCols_newEnd, gblCols.end ());
4111 vals.erase (vals_newEnd, vals.end ());
4112 numEntPerNonlocalRow[curPos] = gblCols.size ();
4123 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4125 auto iter = std::min_element (myNonlocalGblRows.begin (),
4126 myNonlocalGblRows.end ());
4127 if (iter != myNonlocalGblRows.end ()) {
4128 myMinNonlocalGblRow = *iter;
4131 GO gblMinNonlocalGblRow = 0;
4132 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4133 outArg (gblMinNonlocalGblRow));
4134 const GO indexBase = gblMinNonlocalGblRow;
4135 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4136 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4145 std::ostringstream os;
4146 os << *prefix <<
"Create nonlocal matrix" << endl;
4147 std::cerr << os.str();
4149 RCP<crs_matrix_type> nonlocalMatrix =
4150 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4152 size_type curPos = 0;
4153 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4154 ++mapIter, ++curPos) {
4155 const GO gblRow = mapIter->first;
4157 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4158 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4160 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4172 auto origRowMap = this->getRowMap ();
4173 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4175 int isLocallyComplete = 1;
4177 if (origRowMapIsOneToOne) {
4179 std::ostringstream os;
4180 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4181 std::cerr << os.str();
4183 export_type exportToOrig (nonlocalRowMap, origRowMap);
4185 isLocallyComplete = 0;
4188 std::ostringstream os;
4189 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4190 std::cerr << os.str();
4192 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4197 std::ostringstream os;
4198 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4199 std::cerr << os.str();
4206 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4208 isLocallyComplete = 0;
4216 std::ostringstream os;
4217 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4219 std::cerr << os.str();
4221 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4223 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4229 std::ostringstream os;
4230 os << *prefix <<
"Free nonlocalMatrix" << endl;
4231 std::cerr << os.str();
4233 nonlocalMatrix = Teuchos::null;
4237 std::ostringstream os;
4238 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4239 std::cerr << os.str();
4241 import_type importToOrig (oneToOneRowMap, origRowMap);
4242 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4250 std::ostringstream os;
4251 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4252 std::cerr << os.str();
4254 decltype (nonlocals_) newNonlocals;
4255 std::swap (nonlocals_, newNonlocals);
4264 int isGloballyComplete = 0;
4265 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4266 outArg (isGloballyComplete));
4267 TEUCHOS_TEST_FOR_EXCEPTION
4268 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4269 "you called insertGlobalValues with a global row index which is not in "
4270 "the matrix's row Map on any process in its communicator.");
4273 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4278 if (! isStaticGraph ()) {
4279 myGraph_->resumeFill (params);
4281 #if KOKKOSKERNELS_VERSION >= 40299
4283 applyHelper.reset();
4285 fillComplete_ =
false;
4288 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4292 return getCrsGraphRef ().haveGlobalConstants ();
4295 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4300 const char tfecfFuncName[] =
"fillComplete(params): ";
4302 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4303 (this->getCrsGraph ().is_null (), std::logic_error,
4304 "getCrsGraph() returns null. This should not happen at this point. "
4305 "Please report this bug to the Tpetra developers.");
4315 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4316 Teuchos::RCP<const map_type> domainMap = rangeMap;
4317 this->fillComplete (domainMap, rangeMap, params);
4321 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4325 const Teuchos::RCP<const map_type>& rangeMap,
4326 const Teuchos::RCP<Teuchos::ParameterList>& params)
4330 using Teuchos::ArrayRCP;
4334 const char tfecfFuncName[] =
"fillComplete: ";
4335 ProfilingRegion regionFillComplete
4336 (
"Tpetra::CrsMatrix::fillComplete");
4337 const bool verbose = Behavior::verbose(
"CrsMatrix");
4338 std::unique_ptr<std::string> prefix;
4340 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4341 std::ostringstream os;
4342 os << *prefix << endl;
4343 std::cerr << os.str ();
4346 "Tpetra::CrsMatrix::fillCompete",
4349 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4350 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4351 "Matrix fill state must be active (isFillActive() "
4352 "must be true) before you may call fillComplete().");
4353 const int numProcs = this->getComm ()->getSize ();
4363 bool assertNoNonlocalInserts =
false;
4366 bool sortGhosts =
true;
4368 if (! params.is_null ()) {
4369 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4370 assertNoNonlocalInserts);
4371 if (params->isParameter (
"sort column map ghost gids")) {
4372 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4374 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4375 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4380 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4382 if (! this->myGraph_.is_null ()) {
4383 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4386 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4387 if (this->hasColMap ()) {
4388 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4391 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4396 if (needGlobalAssemble) {
4397 this->globalAssemble ();
4400 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4401 (numProcs == 1 && nonlocals_.size() > 0,
4402 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4403 "An invalid entry (i.e., with row index not in the row Map) must have "
4404 "been submitted to the CrsMatrix.");
4407 if (this->isStaticGraph ()) {
4415 #ifdef HAVE_TPETRA_DEBUG
4433 const bool domainMapsMatch =
4434 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4435 const bool rangeMapsMatch =
4436 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4438 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4439 (! domainMapsMatch, std::runtime_error,
4440 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4441 "The graph cannot be changed because it was given to the CrsMatrix "
4442 "constructor as const. You can fix this by passing in the graph's "
4443 "domain Map and range Map to the matrix's fillComplete call.");
4445 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4446 (! rangeMapsMatch, std::runtime_error,
4447 "The CrsMatrix's range Map does not match the graph's range Map. "
4448 "The graph cannot be changed because it was given to the CrsMatrix "
4449 "constructor as const. You can fix this by passing in the graph's "
4450 "domain Map and range Map to the matrix's fillComplete call.");
4451 #endif // HAVE_TPETRA_DEBUG
4455 this->fillLocalMatrix (params);
4463 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4466 Teuchos::Array<int> remotePIDs (0);
4467 const bool mustBuildColMap = ! this->hasColMap ();
4468 if (mustBuildColMap) {
4469 this->myGraph_->makeColMap (remotePIDs);
4474 const std::pair<size_t, std::string> makeIndicesLocalResult =
4475 this->myGraph_->makeIndicesLocal(verbose);
4480 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4481 (makeIndicesLocalResult.first != 0, std::runtime_error,
4482 makeIndicesLocalResult.second);
4484 const bool sorted = this->myGraph_->isSorted ();
4485 const bool merged = this->myGraph_->isMerged ();
4486 this->sortAndMergeIndicesAndValues (sorted, merged);
4491 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4495 this->fillLocalGraphAndMatrix (params);
4497 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4498 params->get (
"compute global constants",
true);
4499 if (callGraphComputeGlobalConstants) {
4500 this->myGraph_->computeGlobalConstants ();
4503 this->myGraph_->computeLocalConstants ();
4505 this->myGraph_->fillComplete_ =
true;
4506 this->myGraph_->checkInternalState ();
4511 this->fillComplete_ =
true;
4514 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4516 this->checkInternalState ();
4520 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4524 const Teuchos::RCP<const map_type> & rangeMap,
4525 const Teuchos::RCP<const import_type>& importer,
4526 const Teuchos::RCP<const export_type>& exporter,
4527 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4529 #ifdef HAVE_TPETRA_MMM_TIMINGS
4531 if(!params.is_null())
4532 label = params->get(
"Timer Label",label);
4533 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4534 using Teuchos::TimeMonitor;
4536 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4539 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4540 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4541 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4542 "must be true) before calling fillComplete().");
4543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4544 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4547 #ifdef HAVE_TPETRA_MMM_TIMINGS
4548 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4551 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4555 #ifdef HAVE_TPETRA_MMM_TIMINGS
4556 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4559 fillLocalGraphAndMatrix (params);
4564 fillComplete_ =
true;
4567 #ifdef HAVE_TPETRA_DEBUG
4568 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4569 ": We're at the end of fillComplete(), but isFillActive() is true. "
4570 "Please report this bug to the Tpetra developers.");
4571 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4572 ": We're at the end of fillComplete(), but isFillActive() is true. "
4573 "Please report this bug to the Tpetra developers.");
4574 #endif // HAVE_TPETRA_DEBUG
4576 #ifdef HAVE_TPETRA_MMM_TIMINGS
4577 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4580 checkInternalState();
4584 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4590 LocalOrdinal* beg = cols;
4591 LocalOrdinal* end = cols + rowLen;
4592 LocalOrdinal* newend = beg;
4594 LocalOrdinal* cur = beg + 1;
4598 while (cur != end) {
4599 if (*cur != *newend) {
4616 return newend - beg;
4619 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4624 using ::Tpetra::Details::ProfilingRegion;
4625 typedef LocalOrdinal LO;
4626 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4627 host_execution_space;
4628 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4629 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4630 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4632 if (! sorted || ! merged) {
4633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4634 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4635 "\"static\" (const) graph, since the matrix does not own the graph.");
4636 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4637 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4638 "this matrix claims ! isStaticGraph(). "
4639 "Please report this bug to the Tpetra developers.");
4640 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4641 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4642 "this method if the graph's storage has already been optimized. "
4643 "Please report this bug to the Tpetra developers.");
4646 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4647 size_t totalNumDups = 0;
4652 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4654 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4655 [=] (
const LO lclRow,
size_t& numDups) {
4656 size_t rowBegin = rowBegins_(lclRow);
4657 size_t rowLen = rowLengths_(lclRow);
4658 LO* cols = cols_.data() + rowBegin;
4661 sort2 (cols, cols + rowLen, vals);
4664 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4665 rowLengths_(lclRow) = newRowLength;
4666 numDups += rowLen - newRowLength;
4679 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4690 using Teuchos::rcp_const_cast;
4691 using Teuchos::rcpFromRef;
4692 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4693 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4699 if (alpha == ZERO) {
4702 }
else if (beta != ONE) {
4716 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4717 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4723 const bool Y_is_overwritten = (beta ==
ZERO);
4726 const bool Y_is_replicated =
4727 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4735 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4742 RCP<const MV> X_colMap;
4743 if (importer.is_null ()) {
4751 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4753 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4758 X_colMap = rcpFromRef (X_in);
4762 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4768 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4771 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4772 X_colMap = rcp_const_cast<
const MV> (X_colMapNonConst);
4779 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4786 if (! exporter.is_null ()) {
4787 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4789 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4795 if (Y_is_overwritten) {
4821 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4828 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4832 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4840 if (Y_is_replicated) {
4841 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4846 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4851 const Teuchos::ETransp mode,
4856 using Teuchos::null;
4859 using Teuchos::rcp_const_cast;
4860 using Teuchos::rcpFromRef;
4861 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4864 if (alpha == ZERO) {
4877 else if (beta == ZERO) {
4895 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4896 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4901 const bool Y_is_replicated = (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4902 const bool Y_is_overwritten = (beta ==
ZERO);
4903 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4909 X = rcp (
new MV (X_in, Teuchos::Copy));
4911 X = rcpFromRef (X_in);
4915 if (importer != Teuchos::null) {
4916 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4919 if (importMV_ == null) {
4920 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4923 if (exporter != Teuchos::null) {
4924 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4927 if (exportMV_ == null) {
4928 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4934 if (! exporter.is_null ()) {
4935 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4936 exportMV_->doImport (X_in, *exporter,
INSERT);
4943 if (importer != Teuchos::null) {
4944 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4951 importMV_->putScalar (ZERO);
4953 this->localApply (*X, *importMV_, mode, alpha, ZERO);
4955 if (Y_is_overwritten) {
4972 MV Y (Y_in, Teuchos::Copy);
4973 this->localApply (*X, Y, mode, alpha, beta);
4976 this->localApply (*X, Y_in, mode, alpha, beta);
4983 if (Y_is_replicated) {
4984 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4989 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4994 const Teuchos::ETransp mode,
4995 const Scalar& alpha,
4996 const Scalar& beta)
const
4999 using Teuchos::NO_TRANS;
5000 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5007 const char tfecfFuncName[] =
"localApply: ";
5008 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5012 const bool transpose = (mode != Teuchos::NO_TRANS);
5013 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5015 getColMap ()->getLocalNumElements (), std::runtime_error,
5016 "NO_TRANS case: X has the wrong number of local rows. "
5018 "getColMap()->getLocalNumElements() = " <<
5019 getColMap ()->getLocalNumElements () <<
".");
5020 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5022 getRowMap ()->getLocalNumElements (), std::runtime_error,
5023 "NO_TRANS case: Y has the wrong number of local rows. "
5025 "getRowMap()->getLocalNumElements() = " <<
5026 getRowMap ()->getLocalNumElements () <<
".");
5027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5029 getRowMap ()->getLocalNumElements (), std::runtime_error,
5030 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5032 <<
" != getRowMap()->getLocalNumElements() = "
5033 << getRowMap ()->getLocalNumElements () <<
".");
5034 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5036 getColMap ()->getLocalNumElements (), std::runtime_error,
5037 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5039 <<
" != getColMap()->getLocalNumElements() = "
5040 << getColMap ()->getLocalNumElements () <<
".");
5041 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5042 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5043 "fill complete. You must call fillComplete() (possibly with "
5044 "domain and range Map arguments) without an intervening "
5045 "resumeFill() call before you may call this method.");
5046 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5048 std::runtime_error,
"X and Y must be constant stride.");
5053 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5054 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5055 && X_lcl.extent(0) != 0,
5056 std::runtime_error,
"X and Y may not alias one another.");
5059 #if KOKKOSKERNELS_VERSION >= 40299
5060 auto A_lcl = getLocalMatrixDevice();
5062 if(!applyHelper.get()) {
5065 bool useMergePath =
false;
5066 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
5072 if constexpr(std::is_same_v<execution_space, Kokkos::Cuda>) {
5073 LocalOrdinal nrows = getLocalNumRows();
5074 LocalOrdinal maxRowImbalance = 0;
5076 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5079 useMergePath =
true;
5082 applyHelper = std::make_shared<ApplyHelper>(A_lcl.nnz(), A_lcl.graph.row_map,
5083 useMergePath ? KokkosSparse::SPMV_MERGE_PATH : KokkosSparse::SPMV_DEFAULT);
5087 const char* modeKK =
nullptr;
5090 case Teuchos::NO_TRANS:
5091 modeKK = KokkosSparse::NoTranspose;
break;
5092 case Teuchos::TRANS:
5093 modeKK = KokkosSparse::Transpose;
break;
5094 case Teuchos::CONJ_TRANS:
5095 modeKK = KokkosSparse::ConjugateTranspose;
break;
5097 throw std::invalid_argument(
"Tpetra::CrsMatrix::localApply: invalid mode");
5100 if(applyHelper->shouldUseIntRowptrs())
5102 auto A_lcl_int_rowptrs = applyHelper->getIntRowptrMatrix(A_lcl);
5104 &applyHelper->handle_int, modeKK,
5110 &applyHelper->handle, modeKK,
5114 LocalOrdinal nrows = getLocalNumRows();
5115 LocalOrdinal maxRowImbalance = 0;
5117 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5119 auto matrix_lcl = getLocalMultiplyOperator();
5121 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5123 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5127 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5132 Teuchos::ETransp mode,
5137 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5139 TEUCHOS_TEST_FOR_EXCEPTION
5140 (! isFillComplete (), std::runtime_error,
5141 fnName <<
": Cannot call apply() until fillComplete() "
5142 "has been called.");
5144 if (mode == Teuchos::NO_TRANS) {
5145 ProfilingRegion regionNonTranspose (fnName);
5146 this->applyNonTranspose (X, Y, alpha, beta);
5149 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5150 this->applyTranspose (X, Y, mode, alpha, beta);
5155 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5157 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5163 const char tfecfFuncName[] =
"convert: ";
5165 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5166 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5167 "of the conversion) is not fill complete. You must first call "
5168 "fillComplete() (possibly with the domain and range Map) without an "
5169 "intervening call to resumeFill(), before you may call this method.");
5171 RCP<output_matrix_type> newMatrix
5172 (
new output_matrix_type (this->getCrsGraph ()));
5176 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5177 this->getLocalMatrixDevice ().values);
5181 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5187 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5194 const char tfecfFuncName[] =
"checkInternalState: ";
5195 const char err[] =
"Internal state is not consistent. "
5196 "Please report this bug to the Tpetra developers.";
5200 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5201 (staticGraph_.is_null (), std::logic_error, err);
5205 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5206 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5207 std::logic_error, err);
5209 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5210 (isFillComplete () && ! staticGraph_->isFillComplete (),
5211 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5212 "but its graph is NOT fill complete.");
5215 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5216 (staticGraph_->indicesAreAllocated () &&
5217 staticGraph_->getLocalAllocationSize() > 0 &&
5218 staticGraph_->getLocalNumRows() > 0 &&
5219 valuesUnpacked_wdv.extent (0) == 0,
5220 std::logic_error, err);
5224 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5229 std::ostringstream os;
5231 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5232 if (this->getObjectLabel () !=
"") {
5233 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5235 if (isFillComplete ()) {
5236 os <<
"isFillComplete: true"
5237 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5238 << getGlobalNumCols () <<
"]"
5239 <<
", global number of entries: " << getGlobalNumEntries ()
5243 os <<
"isFillComplete: false"
5244 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5245 << getGlobalNumCols () <<
"]}";
5250 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5254 const Teuchos::EVerbosityLevel verbLevel)
const
5258 using Teuchos::ArrayView;
5259 using Teuchos::Comm;
5261 using Teuchos::TypeNameTraits;
5262 using Teuchos::VERB_DEFAULT;
5263 using Teuchos::VERB_NONE;
5264 using Teuchos::VERB_LOW;
5265 using Teuchos::VERB_MEDIUM;
5266 using Teuchos::VERB_HIGH;
5267 using Teuchos::VERB_EXTREME;
5269 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5271 if (vl == VERB_NONE) {
5276 Teuchos::OSTab tab0 (out);
5278 RCP<const Comm<int> > comm = this->getComm();
5279 const int myRank = comm->getRank();
5280 const int numProcs = comm->getSize();
5282 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5285 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5295 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5297 Teuchos::OSTab tab1 (out);
5300 if (this->getObjectLabel () !=
"") {
5301 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5304 out <<
"Template parameters:" << endl;
5305 Teuchos::OSTab tab2 (out);
5306 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5307 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5308 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5309 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5311 if (isFillComplete()) {
5312 out <<
"isFillComplete: true" << endl
5313 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5314 << getGlobalNumCols () <<
"]" << endl
5315 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5316 << endl <<
"Global max number of entries in a row: "
5317 << getGlobalMaxNumRowEntries () << endl;
5320 out <<
"isFillComplete: false" << endl
5321 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5322 << getGlobalNumCols () <<
"]" << endl;
5326 if (vl < VERB_MEDIUM) {
5332 out << endl <<
"Row Map:" << endl;
5334 if (getRowMap ().is_null ()) {
5336 out <<
"null" << endl;
5343 getRowMap ()->describe (out, vl);
5348 out <<
"Column Map: ";
5350 if (getColMap ().is_null ()) {
5352 out <<
"null" << endl;
5354 }
else if (getColMap () == getRowMap ()) {
5356 out <<
"same as row Map" << endl;
5362 getColMap ()->describe (out, vl);
5367 out <<
"Domain Map: ";
5369 if (getDomainMap ().is_null ()) {
5371 out <<
"null" << endl;
5373 }
else if (getDomainMap () == getRowMap ()) {
5375 out <<
"same as row Map" << endl;
5377 }
else if (getDomainMap () == getColMap ()) {
5379 out <<
"same as column Map" << endl;
5385 getDomainMap ()->describe (out, vl);
5390 out <<
"Range Map: ";
5392 if (getRangeMap ().is_null ()) {
5394 out <<
"null" << endl;
5396 }
else if (getRangeMap () == getDomainMap ()) {
5398 out <<
"same as domain Map" << endl;
5400 }
else if (getRangeMap () == getRowMap ()) {
5402 out <<
"same as row Map" << endl;
5408 getRangeMap ()->describe (out, vl);
5412 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5413 if (myRank == curRank) {
5414 out <<
"Process rank: " << curRank << endl;
5415 Teuchos::OSTab tab2 (out);
5416 if (! staticGraph_->indicesAreAllocated ()) {
5417 out <<
"Graph indices not allocated" << endl;
5420 out <<
"Number of allocated entries: "
5421 << staticGraph_->getLocalAllocationSize () << endl;
5423 out <<
"Number of entries: " << getLocalNumEntries () << endl
5424 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5433 if (vl < VERB_HIGH) {
5438 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5439 if (myRank == curRank) {
5440 out << std::setw(width) <<
"Proc Rank"
5441 << std::setw(width) <<
"Global Row"
5442 << std::setw(width) <<
"Num Entries";
5443 if (vl == VERB_EXTREME) {
5444 out << std::setw(width) <<
"(Index,Value)";
5447 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5448 const size_t nE = getNumEntriesInLocalRow(r);
5449 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5450 out << std::setw(width) << myRank
5451 << std::setw(width) << gid
5452 << std::setw(width) << nE;
5453 if (vl == VERB_EXTREME) {
5454 if (isGloballyIndexed()) {
5455 global_inds_host_view_type rowinds;
5456 values_host_view_type rowvals;
5457 getGlobalRowView (gid, rowinds, rowvals);
5458 for (
size_t j = 0; j < nE; ++j) {
5459 out <<
" (" << rowinds[j]
5460 <<
", " << rowvals[j]
5464 else if (isLocallyIndexed()) {
5465 local_inds_host_view_type rowinds;
5466 values_host_view_type rowvals;
5467 getLocalRowView (r, rowinds, rowvals);
5468 for (
size_t j=0; j < nE; ++j) {
5469 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5470 <<
", " << rowvals[j]
5486 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5500 return (srcRowMat !=
nullptr);
5503 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5507 const typename crs_graph_type::padding_type& padding,
5513 using LO = local_ordinal_type;
5514 using row_ptrs_type =
5515 typename local_graph_device_type::row_map_type::non_const_type;
5516 using range_policy =
5517 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5518 const char tfecfFuncName[] =
"applyCrsPadding";
5519 const char suffix[] =
5520 ". Please report this bug to the Tpetra developers.";
5521 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5523 std::unique_ptr<std::string> prefix;
5525 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5526 std::ostringstream os;
5527 os << *prefix <<
"padding: ";
5530 std::cerr << os.str();
5532 const int myRank = ! verbose ? -1 : [&] () {
5533 auto map = this->getMap();
5534 if (map.is_null()) {
5537 auto comm = map->getComm();
5538 if (comm.is_null()) {
5541 return comm->getRank();
5545 if (! myGraph_->indicesAreAllocated()) {
5547 std::ostringstream os;
5548 os << *prefix <<
"Call allocateIndices" << endl;
5549 std::cerr << os.str();
5551 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5563 std::ostringstream os;
5564 os << *prefix <<
"Allocate row_ptrs_beg: "
5565 << myGraph_->getRowPtrsUnpackedHost().extent(0) << endl;
5566 std::cerr << os.str();
5568 using Kokkos::view_alloc;
5569 using Kokkos::WithoutInitializing;
5570 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5571 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5573 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5575 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5576 size_t(row_ptr_beg.extent(0) - 1);
5578 std::ostringstream os;
5579 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5580 std::cerr << os.str();
5582 row_ptrs_type row_ptr_end(
5583 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5585 row_ptrs_type num_row_entries_d;
5587 const bool refill_num_row_entries =
5588 myGraph_->k_numRowEntries_.extent(0) != 0;
5590 if (refill_num_row_entries) {
5593 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5594 myGraph_->k_numRowEntries_);
5595 Kokkos::parallel_for
5596 (
"Fill end row pointers", range_policy(0, N),
5597 KOKKOS_LAMBDA (
const size_t i) {
5598 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5605 Kokkos::parallel_for
5606 (
"Fill end row pointers", range_policy(0, N),
5607 KOKKOS_LAMBDA (
const size_t i) {
5608 row_ptr_end(i) = row_ptr_beg(i+1);
5612 if (myGraph_->isGloballyIndexed()) {
5614 myGraph_->gblInds_wdv,
5615 valuesUnpacked_wdv, padding, myRank, verbose);
5616 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5617 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5618 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5619 (newValuesLen != newColIndsLen, std::logic_error,
5620 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5621 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5626 myGraph_->lclIndsUnpacked_wdv,
5627 valuesUnpacked_wdv, padding, myRank, verbose);
5628 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5629 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5630 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5631 (newValuesLen != newColIndsLen, std::logic_error,
5632 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5633 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5637 if (refill_num_row_entries) {
5638 Kokkos::parallel_for
5639 (
"Fill num entries", range_policy(0, N),
5640 KOKKOS_LAMBDA (
const size_t i) {
5641 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5647 std::ostringstream os;
5648 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5649 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5650 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5651 std::cerr << os.str();
5652 TEUCHOS_ASSERT( myGraph_->getRowPtrsUnpackedHost().extent(0) ==
5653 row_ptr_beg.extent(0) );
5655 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5658 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5660 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5661 copyAndPermuteStaticGraph(
5662 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5663 const size_t numSameIDs,
5664 const LocalOrdinal permuteToLIDs[],
5665 const LocalOrdinal permuteFromLIDs[],
5666 const size_t numPermutes)
5668 using Details::ProfilingRegion;
5669 using Teuchos::Array;
5670 using Teuchos::ArrayView;
5672 using LO = LocalOrdinal;
5673 using GO = GlobalOrdinal;
5674 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5675 const char suffix[] =
5676 " Please report this bug to the Tpetra developers.";
5677 ProfilingRegion regionCAP
5678 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5682 std::unique_ptr<std::string> prefix;
5684 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5685 std::ostringstream os;
5686 os << *prefix <<
"Start" << endl;
5688 const char*
const prefix_raw =
5689 verbose ? prefix.get()->c_str() :
nullptr;
5691 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5696 const map_type& srcRowMap = * (srcMat.getRowMap ());
5697 nonconst_global_inds_host_view_type rowInds;
5698 nonconst_values_host_view_type rowVals;
5699 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5700 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5704 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5705 const GO targetGID = sourceGID;
5707 ArrayView<const GO>rowIndsConstView;
5708 ArrayView<const Scalar> rowValsConstView;
5710 if (sourceIsLocallyIndexed) {
5711 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5712 if (rowLength > static_cast<size_t> (rowInds.size())) {
5713 Kokkos::resize(rowInds,rowLength);
5714 Kokkos::resize(rowVals,rowLength);
5718 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5719 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5724 size_t checkRowLength = 0;
5725 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5726 rowValsView, checkRowLength);
5728 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5729 (rowLength != checkRowLength, std::logic_error,
"For "
5730 "global row index " << sourceGID <<
", the source "
5731 "matrix's getNumEntriesInGlobalRow returns a row length "
5732 "of " << rowLength <<
", but getGlobalRowCopy reports "
5733 "a row length of " << checkRowLength <<
"." << suffix);
5740 rowIndsConstView = Teuchos::ArrayView<const GO> (
5741 rowIndsView.data(), rowIndsView.extent(0),
5742 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5743 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5744 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5745 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5750 global_inds_host_view_type rowIndsView;
5751 values_host_view_type rowValsView;
5752 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5757 rowIndsConstView = Teuchos::ArrayView<const GO> (
5758 rowIndsView.data(), rowIndsView.extent(0),
5759 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5760 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5761 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5762 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5770 combineGlobalValues(targetGID, rowIndsConstView,
5772 prefix_raw, debug, verbose);
5776 std::ostringstream os;
5777 os << *prefix <<
"Do permutes" << endl;
5780 const map_type& tgtRowMap = * (this->getRowMap ());
5781 for (
size_t p = 0; p < numPermutes; ++p) {
5782 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5783 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5785 ArrayView<const GO> rowIndsConstView;
5786 ArrayView<const Scalar> rowValsConstView;
5788 if (sourceIsLocallyIndexed) {
5789 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5790 if (rowLength > static_cast<size_t> (rowInds.size ())) {
5791 Kokkos::resize(rowInds,rowLength);
5792 Kokkos::resize(rowVals,rowLength);
5796 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5797 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5802 size_t checkRowLength = 0;
5803 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5804 rowValsView, checkRowLength);
5806 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5807 (rowLength != checkRowLength, std::logic_error,
"For "
5808 "source matrix global row index " << sourceGID <<
", "
5809 "getNumEntriesInGlobalRow returns a row length of " <<
5810 rowLength <<
", but getGlobalRowCopy a row length of "
5811 << checkRowLength <<
"." << suffix);
5818 rowIndsConstView = Teuchos::ArrayView<const GO> (
5819 rowIndsView.data(), rowIndsView.extent(0),
5820 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5821 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5822 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5823 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5828 global_inds_host_view_type rowIndsView;
5829 values_host_view_type rowValsView;
5830 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5835 rowIndsConstView = Teuchos::ArrayView<const GO> (
5836 rowIndsView.data(), rowIndsView.extent(0),
5837 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5838 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5839 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5840 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5845 combineGlobalValues(targetGID, rowIndsConstView,
5847 prefix_raw, debug, verbose);
5851 std::ostringstream os;
5852 os << *prefix <<
"Done" << endl;
5856 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5858 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5859 copyAndPermuteNonStaticGraph(
5860 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5861 const size_t numSameIDs,
5862 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5863 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5864 const size_t numPermutes)
5866 using Details::ProfilingRegion;
5867 using Teuchos::Array;
5868 using Teuchos::ArrayView;
5870 using LO = LocalOrdinal;
5871 using GO = GlobalOrdinal;
5872 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5873 const char suffix[] =
5874 " Please report this bug to the Tpetra developers.";
5875 ProfilingRegion regionCAP
5876 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5880 std::unique_ptr<std::string> prefix;
5882 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5883 std::ostringstream os;
5884 os << *prefix <<
"Start" << endl;
5886 const char*
const prefix_raw =
5887 verbose ? prefix.get()->c_str() :
nullptr;
5890 using row_graph_type = RowGraph<LO, GO, Node>;
5891 const row_graph_type& srcGraph = *(srcMat.getGraph());
5893 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5894 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5895 applyCrsPadding(*padding, verbose);
5897 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5902 const map_type& srcRowMap = * (srcMat.getRowMap ());
5903 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5904 using gids_type = nonconst_global_inds_host_view_type;
5905 using vals_type = nonconst_values_host_view_type;
5908 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5912 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5913 const GO targetGID = sourceGID;
5915 ArrayView<const GO> rowIndsConstView;
5916 ArrayView<const Scalar> rowValsConstView;
5918 if (sourceIsLocallyIndexed) {
5920 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5921 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5922 Kokkos::resize(rowInds,rowLength);
5923 Kokkos::resize(rowVals,rowLength);
5927 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5928 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5933 size_t checkRowLength = 0;
5934 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5938 (rowLength != checkRowLength, std::logic_error,
": For "
5939 "global row index " << sourceGID <<
", the source "
5940 "matrix's getNumEntriesInGlobalRow returns a row length "
5941 "of " << rowLength <<
", but getGlobalRowCopy reports "
5942 "a row length of " << checkRowLength <<
"." << suffix);
5944 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5945 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5948 global_inds_host_view_type rowIndsView;
5949 values_host_view_type rowValsView;
5950 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5956 rowIndsConstView = Teuchos::ArrayView<const GO> (
5957 rowIndsView.data(), rowIndsView.extent(0),
5958 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5959 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5960 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5961 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5967 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5968 rowValsConstView, prefix_raw, debug, verbose);
5972 std::ostringstream os;
5973 os << *prefix <<
"Do permutes" << endl;
5975 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5976 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5978 const map_type& tgtRowMap = * (this->getRowMap ());
5979 for (
size_t p = 0; p < numPermutes; ++p) {
5980 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5981 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5983 ArrayView<const GO> rowIndsConstView;
5984 ArrayView<const Scalar> rowValsConstView;
5986 if (sourceIsLocallyIndexed) {
5987 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5988 if (rowLength > static_cast<size_t> (rowInds.extent(0))) {
5989 Kokkos::resize(rowInds,rowLength);
5990 Kokkos::resize(rowVals,rowLength);
5994 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5995 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
6000 size_t checkRowLength = 0;
6001 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6002 rowValsView, checkRowLength);
6004 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6005 (rowLength != checkRowLength, std::logic_error,
"For "
6006 "source matrix global row index " << sourceGID <<
", "
6007 "getNumEntriesInGlobalRow returns a row length of " <<
6008 rowLength <<
", but getGlobalRowCopy a row length of "
6009 << checkRowLength <<
"." << suffix);
6011 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
6012 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
6015 global_inds_host_view_type rowIndsView;
6016 values_host_view_type rowValsView;
6017 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
6023 rowIndsConstView = Teuchos::ArrayView<const GO> (
6024 rowIndsView.data(), rowIndsView.extent(0),
6025 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6026 rowValsConstView = Teuchos::ArrayView<const Scalar> (
6027 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
6028 Teuchos::RCP_DISABLE_NODE_LOOKUP);
6034 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6035 rowValsConstView, prefix_raw, debug, verbose);
6039 std::ostringstream os;
6040 os << *prefix <<
"Done" << endl;
6044 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6049 const size_t numSameIDs,
6050 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6051 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6060 const char tfecfFuncName[] =
"copyAndPermute: ";
6061 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6063 const bool verbose = Behavior::verbose(
"CrsMatrix");
6064 std::unique_ptr<std::string> prefix;
6066 prefix = this->createPrefix(
"CrsMatrix",
"copyAndPermute");
6067 std::ostringstream os;
6068 os << *prefix << endl
6069 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6070 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6079 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6081 std::cerr << os.str ();
6084 const auto numPermute = permuteToLIDs.extent (0);
6085 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6086 (numPermute != permuteFromLIDs.extent (0),
6087 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6088 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6089 << permuteFromLIDs.extent (0) <<
".");
6094 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6095 if (isStaticGraph ()) {
6096 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6097 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6098 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6099 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6101 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6102 permuteToLIDs_h.data(),
6103 permuteFromLIDs_h.data(),
6107 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6108 permuteFromLIDs, numPermute);
6112 std::ostringstream os;
6113 os << *prefix <<
"Done" << endl;
6114 std::cerr << os.str();
6118 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6123 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6124 Kokkos::DualView<char*, buffer_device_type>& exports,
6125 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6126 size_t& constantNumPackets)
6131 using Teuchos::outArg;
6132 using Teuchos::REDUCE_MAX;
6133 using Teuchos::reduceAll;
6135 typedef LocalOrdinal LO;
6136 typedef GlobalOrdinal GO;
6137 const char tfecfFuncName[] =
"packAndPrepare: ";
6138 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6140 const bool debug = Behavior::debug(
"CrsMatrix");
6141 const bool verbose = Behavior::verbose(
"CrsMatrix");
6144 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6145 if (pComm.is_null ()) {
6148 const Teuchos::Comm<int>& comm = *pComm;
6149 const int myRank = comm.getSize ();
6151 std::unique_ptr<std::string> prefix;
6153 prefix = this->createPrefix(
"CrsMatrix",
"packAndPrepare");
6154 std::ostringstream os;
6155 os << *prefix <<
"Start" << endl
6165 std::cerr << os.str ();
6188 std::ostringstream msg;
6191 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6192 const crs_matrix_type* srcCrsMat =
6193 dynamic_cast<const crs_matrix_type*
> (&source);
6194 if (srcCrsMat !=
nullptr) {
6196 std::ostringstream os;
6197 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6198 "calling packNew" << endl;
6199 std::cerr << os.str ();
6202 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6203 constantNumPackets);
6205 catch (std::exception& e) {
6207 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6211 using Kokkos::HostSpace;
6212 using Kokkos::subview;
6213 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6214 using range_type = Kokkos::pair<size_t, size_t>;
6217 std::ostringstream os;
6218 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6220 std::cerr << os.str ();
6223 const row_matrix_type* srcRowMat =
6224 dynamic_cast<const row_matrix_type*
> (&source);
6225 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6226 (srcRowMat ==
nullptr, std::invalid_argument,
6227 "The source object of the Import or Export operation is neither a "
6228 "CrsMatrix (with the same template parameters as the target object), "
6229 "nor a RowMatrix (with the same first four template parameters as the "
6240 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6241 auto exportLIDs_h = exportLIDs.view_host ();
6242 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6243 exportLIDs_h.size ());
6247 Teuchos::Array<char> exports_a;
6253 numPacketsPerLID.clear_sync_state ();
6254 numPacketsPerLID.modify_host ();
6255 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6256 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6257 numPacketsPerLID_h.size ());
6262 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6263 constantNumPackets);
6265 catch (std::exception& e) {
6267 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6271 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6272 if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6273 const std::string oldLabel = exports.d_view.label ();
6274 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6275 exports = exports_type (newLabel, newAllocSize);
6280 exports.modify_host();
6282 auto exports_h = exports.view_host ();
6283 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6287 typedef typename exports_type::t_host::execution_space HES;
6288 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6289 Kokkos::View<const char*, host_device_type>
6290 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6297 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6301 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6302 "one or more participating processes.");
6306 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6307 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6308 "or more participating processes. Here is this process' error "
6309 "message: " << msg.str ());
6313 std::ostringstream os;
6314 os << *prefix <<
"packAndPrepare: Done!" << endl
6324 std::cerr << os.str ();
6328 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6330 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6331 packRow (
char exports[],
6332 const size_t offset,
6333 const size_t numEnt,
6334 const GlobalOrdinal gidsIn[],
6335 const impl_scalar_type valsIn[],
6336 const size_t numBytesPerValue)
const
6339 using Kokkos::subview;
6341 typedef LocalOrdinal LO;
6342 typedef GlobalOrdinal GO;
6343 typedef impl_scalar_type ST;
6351 const LO numEntLO =
static_cast<size_t> (numEnt);
6353 const size_t numEntBeg = offset;
6355 const size_t gidsBeg = numEntBeg + numEntLen;
6356 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6357 const size_t valsBeg = gidsBeg + gidsLen;
6358 const size_t valsLen = numEnt * numBytesPerValue;
6360 char*
const numEntOut = exports + numEntBeg;
6361 char*
const gidsOut = exports + gidsBeg;
6362 char*
const valsOut = exports + valsBeg;
6364 size_t numBytesOut = 0;
6369 Kokkos::pair<int, size_t> p;
6370 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6371 errorCode += p.first;
6372 numBytesOut += p.second;
6374 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6375 errorCode += p.first;
6376 numBytesOut += p.second;
6379 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6380 TEUCHOS_TEST_FOR_EXCEPTION
6381 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6382 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6383 << expectedNumBytes <<
".");
6384 TEUCHOS_TEST_FOR_EXCEPTION
6385 (errorCode != 0, std::runtime_error,
"packRow: "
6386 "PackTraits::packArray returned a nonzero error code");
6391 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6393 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6394 unpackRow (GlobalOrdinal gidsOut[],
6395 impl_scalar_type valsOut[],
6396 const char imports[],
6397 const size_t offset,
6398 const size_t numBytes,
6399 const size_t numEnt,
6400 const size_t numBytesPerValue)
6403 using Kokkos::subview;
6405 typedef LocalOrdinal LO;
6406 typedef GlobalOrdinal GO;
6407 typedef impl_scalar_type ST;
6409 Details::ProfilingRegion region_upack_row(
6410 "Tpetra::CrsMatrix::unpackRow",
6414 if (numBytes == 0) {
6417 const int myRank = this->getMap ()->getComm ()->getRank ();
6418 TEUCHOS_TEST_FOR_EXCEPTION
6419 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6420 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6421 "number of entries to unpack (as reported by numPacketsPerLID) "
6422 "for this row numEnt=" << numEnt <<
" != 0.");
6427 if (numEnt == 0 && numBytes != 0) {
6428 const int myRank = this->getMap ()->getComm ()->getRank ();
6429 TEUCHOS_TEST_FOR_EXCEPTION
6430 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6431 "unpackRow: The number of entries to unpack (as reported by "
6432 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6433 "numBytes=" << numBytes <<
" != 0.");
6439 const size_t numEntBeg = offset;
6441 const size_t gidsBeg = numEntBeg + numEntLen;
6442 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6443 const size_t valsBeg = gidsBeg + gidsLen;
6444 const size_t valsLen = numEnt * numBytesPerValue;
6446 const char*
const numEntIn = imports + numEntBeg;
6447 const char*
const gidsIn = imports + gidsBeg;
6448 const char*
const valsIn = imports + valsBeg;
6450 size_t numBytesOut = 0;
6454 if (static_cast<size_t> (numEntOut) != numEnt ||
6455 numEntOut == static_cast<LO> (0)) {
6456 const int myRank = this->getMap ()->getComm ()->getRank ();
6457 std::ostringstream os;
6458 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6459 bool firstErrorCondition =
false;
6460 if (static_cast<size_t> (numEntOut) != numEnt) {
6461 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6462 <<
" does not equal number of entries unpacked from imports "
6463 "buffer numEntOut=" << numEntOut <<
".";
6464 firstErrorCondition =
true;
6466 if (numEntOut == static_cast<LO> (0)) {
6467 if (firstErrorCondition) {
6470 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6471 "but number of bytes to unpack for this row numBytes=" << numBytes
6472 <<
" != 0. This should never happen, since packRow should only "
6473 "ever pack rows with a nonzero number of entries. In this case, "
6474 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6477 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6481 Kokkos::pair<int, size_t> p;
6482 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6483 errorCode += p.first;
6484 numBytesOut += p.second;
6486 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6487 errorCode += p.first;
6488 numBytesOut += p.second;
6491 TEUCHOS_TEST_FOR_EXCEPTION
6492 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6493 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6495 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6496 TEUCHOS_TEST_FOR_EXCEPTION
6497 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6498 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6499 << expectedNumBytes <<
".");
6501 TEUCHOS_TEST_FOR_EXCEPTION
6502 (errorCode != 0, std::runtime_error,
"unpackRow: "
6503 "PackTraits::unpackArray returned a nonzero error code");
6508 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6510 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6511 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6512 size_t& totalNumEntries,
6513 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6515 using Details::Behavior;
6518 typedef impl_scalar_type IST;
6519 typedef LocalOrdinal LO;
6520 typedef GlobalOrdinal GO;
6526 const bool verbose = Behavior::verbose(
"CrsMatrix");
6527 std::unique_ptr<std::string> prefix;
6529 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6530 std::ostringstream os;
6531 os << *prefix <<
"Before:"
6539 std::cerr << os.str ();
6544 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6546 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6547 auto exportLIDs_h = exportLIDs.view_host ();
6550 totalNumEntries = 0;
6551 for (LO i = 0; i < numExportLIDs; ++i) {
6552 const LO lclRow = exportLIDs_h[i];
6553 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6556 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6559 totalNumEntries += curNumEntries;
6570 const size_t allocSize =
6571 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6572 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6573 if (static_cast<size_t> (exports.extent (0)) < allocSize) {
6574 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6576 const std::string oldLabel = exports.d_view.label ();
6577 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6578 exports = exports_type (newLabel, allocSize);
6582 std::ostringstream os;
6583 os << *prefix <<
"After:"
6591 std::cerr << os.str ();
6595 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6598 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6599 Kokkos::DualView<char*, buffer_device_type>& exports,
6600 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6601 size_t& constantNumPackets)
const
6605 if (this->isStaticGraph ()) {
6608 constantNumPackets);
6611 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6612 constantNumPackets);
6616 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6619 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6620 Kokkos::DualView<char*, buffer_device_type>& exports,
6621 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6622 size_t& constantNumPackets)
const
6630 using LO = LocalOrdinal;
6631 using GO = GlobalOrdinal;
6632 using ST = impl_scalar_type;
6633 const char tfecfFuncName[] =
"packNonStaticNew: ";
6635 const bool verbose = Behavior::verbose(
"CrsMatrix");
6636 std::unique_ptr<std::string> prefix;
6638 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6639 std::ostringstream os;
6640 os << *prefix <<
"Start" << endl;
6641 std::cerr << os.str ();
6644 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6645 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6646 (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
6647 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6648 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6654 constantNumPackets = 0;
6659 size_t totalNumEntries = 0;
6660 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6661 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6664 exports.clear_sync_state();
6665 exports.modify_host();
6666 auto exports_h = exports.view_host ();
6668 std::ostringstream os;
6669 os << *prefix <<
"After marking exports as modified on host, "
6671 std::cerr << os.str ();
6675 auto exportLIDs_h = exportLIDs.view_host ();
6678 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6679 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6680 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6685 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6689 typename global_inds_host_view_type::non_const_type gidsIn_k;
6690 if (this->isLocallyIndexed()) {
6692 typename global_inds_host_view_type::non_const_type(
"packGids",
6697 for (
size_t i = 0; i < numExportLIDs; ++i) {
6698 const LO lclRow = exportLIDs_h[i];
6700 size_t numBytes = 0;
6701 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6708 numPacketsPerLID_h[i] = 0;
6712 if (this->isLocallyIndexed ()) {
6713 typename global_inds_host_view_type::non_const_type gidsIn;
6714 values_host_view_type valsIn;
6718 local_inds_host_view_type lidsIn;
6719 this->getLocalRowView (lclRow, lidsIn, valsIn);
6720 const map_type& colMap = * (this->getColMap ());
6721 for (
size_t k = 0; k < numEnt; ++k) {
6722 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6724 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6726 const size_t numBytesPerValue =
6727 PackTraits<ST>::packValueCount (valsIn[0]);
6728 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6729 gidsIn.data (), valsIn.data (),
6732 else if (this->isGloballyIndexed ()) {
6733 global_inds_host_view_type gidsIn;
6734 values_host_view_type valsIn;
6740 const map_type& rowMap = * (this->getRowMap ());
6741 const GO gblRow = rowMap.getGlobalElement (lclRow);
6742 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6744 const size_t numBytesPerValue =
6745 PackTraits<ST>::packValueCount (valsIn[0]);
6746 numBytes = this->packRow (exports_h.data (), offset, numEnt,
6747 gidsIn.data (), valsIn.data (),
6754 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6755 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6756 "First invalid offset into 'exports' pack buffer at index i = " << i
6757 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6758 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6763 numPacketsPerLID_h[i] = numBytes;
6768 std::ostringstream os;
6769 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6776 std::cerr << os.str ();
6780 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6782 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6783 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6784 const LocalOrdinal numEnt,
6785 const impl_scalar_type vals[],
6786 const GlobalOrdinal cols[],
6788 const char*
const prefix,
6792 using GO = GlobalOrdinal;
6796 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6797 Teuchos::ArrayView<const GO> cols_av
6798 (numEnt == 0 ?
nullptr : cols, numEnt);
6799 Teuchos::ArrayView<const Scalar> vals_av
6800 (numEnt == 0 ?
nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
6805 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6806 prefix, debug, verbose);
6810 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6812 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6813 combineGlobalValues(
6814 const GlobalOrdinal globalRowIndex,
6815 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6816 const Teuchos::ArrayView<const Scalar>& values,
6818 const char*
const prefix,
6822 const char tfecfFuncName[] =
"combineGlobalValues: ";
6824 if (isStaticGraph ()) {
6828 if (combineMode ==
ADD) {
6829 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6831 else if (combineMode ==
REPLACE) {
6832 replaceGlobalValues (globalRowIndex, columnIndices, values);
6834 else if (combineMode ==
ABSMAX) {
6835 using ::Tpetra::Details::AbsMax;
6837 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6841 else if (combineMode ==
INSERT) {
6842 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6843 (isStaticGraph() && combineMode ==
INSERT,
6844 std::invalid_argument,
"INSERT combine mode is forbidden "
6845 "if the matrix has a static (const) graph (i.e., was "
6846 "constructed with the CrsMatrix constructor that takes a "
6847 "const CrsGraph pointer).");
6850 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6851 (
true, std::logic_error,
"Invalid combine mode; should "
6853 "Please report this bug to the Tpetra developers.");
6857 if (combineMode ==
ADD || combineMode ==
INSERT) {
6864 insertGlobalValuesFilteredChecked(globalRowIndex,
6865 columnIndices, values, prefix, debug, verbose);
6876 else if (combineMode ==
ABSMAX) {
6877 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6878 ! isStaticGraph () && combineMode ==
ABSMAX, std::logic_error,
6879 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6882 else if (combineMode ==
REPLACE) {
6883 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6884 ! isStaticGraph () && combineMode ==
REPLACE, std::logic_error,
6885 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6889 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6890 true, std::logic_error,
"Should never get here! Please report this "
6891 "bug to the Tpetra developers.");
6896 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6900 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6901 Kokkos::DualView<char*, buffer_device_type> imports,
6902 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6903 const size_t constantNumPackets,
6910 const char tfecfFuncName[] =
"unpackAndCombine: ";
6911 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6913 const bool debug = Behavior::debug(
"CrsMatrix");
6914 const bool verbose = Behavior::verbose(
"CrsMatrix");
6915 constexpr
int numValidModes = 5;
6918 const char* validModeNames[numValidModes] =
6919 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6921 std::unique_ptr<std::string> prefix;
6923 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6924 std::ostringstream os;
6925 os << *prefix <<
"Start:" << endl
6935 << *prefix <<
" constantNumPackets: " << constantNumPackets
6939 std::cerr << os.str ();
6943 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6944 validModes+numValidModes) {
6945 std::ostringstream os;
6946 os <<
"Invalid combine mode. Valid modes are {";
6947 for (
int k = 0; k < numValidModes; ++k) {
6948 os << validModeNames[k];
6949 if (k < numValidModes - 1) {
6954 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6955 (
true, std::invalid_argument, os.str ());
6957 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6958 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6959 std::invalid_argument,
"importLIDs.extent(0)="
6960 << importLIDs.extent(0)
6961 <<
" != numPacketsPerLID.extent(0)="
6962 << numPacketsPerLID.extent(0) <<
".");
6965 if (combineMode ==
ZERO) {
6970 using Teuchos::reduceAll;
6971 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6974 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6975 constantNumPackets, combineMode,
6977 }
catch (std::exception& e) {
6982 const Teuchos::Comm<int>& comm = * (this->getComm ());
6983 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6984 lclBad, Teuchos::outArg (gblBad));
6990 std::ostringstream os;
6991 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6992 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6993 ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
6994 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6995 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6996 "threw an exception on one or more participating processes: "
6997 << endl << msg->str ());
7001 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7002 constantNumPackets, combineMode,
7007 std::ostringstream os;
7008 os << *prefix <<
"Done!" << endl
7018 std::cerr << os.str ();
7022 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7026 const Kokkos::DualView<
const local_ordinal_type*,
7027 buffer_device_type>& importLIDs,
7028 Kokkos::DualView<char*, buffer_device_type> imports,
7029 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7030 const size_t constantNumPackets,
7035 "Tpetra::CrsMatrix::unpackAndCombineImpl",
7039 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7040 std::unique_ptr<std::string> prefix;
7042 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7043 std::ostringstream os;
7044 os << *prefix <<
"isStaticGraph(): "
7045 << (isStaticGraph() ?
"true" :
"false")
7046 <<
", importLIDs.extent(0): "
7047 << importLIDs.extent(0)
7048 <<
", imports.extent(0): "
7049 << imports.extent(0)
7050 <<
", numPacketsPerLID.extent(0): "
7051 << numPacketsPerLID.extent(0)
7053 std::cerr << os.str();
7056 if (isStaticGraph ()) {
7057 using Details::unpackCrsMatrixAndCombineNew;
7058 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7059 importLIDs, constantNumPackets,
7064 using padding_type =
typename crs_graph_type::padding_type;
7065 std::unique_ptr<padding_type> padding;
7067 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7068 importLIDs, imports, numPacketsPerLID, verbose);
7070 catch (std::exception& e) {
7071 const auto rowMap = getRowMap();
7072 const auto comm = rowMap.is_null() ? Teuchos::null :
7074 const int myRank = comm.is_null() ? -1 : comm->getRank();
7075 TEUCHOS_TEST_FOR_EXCEPTION
7076 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7077 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7078 "threw an exception: " << e.what());
7081 std::ostringstream os;
7082 os << *prefix <<
"Call applyCrsPadding" << endl;
7083 std::cerr << os.str();
7085 applyCrsPadding(*padding, verbose);
7088 std::ostringstream os;
7089 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7090 std::cerr << os.str();
7092 unpackAndCombineImplNonStatic(importLIDs, imports,
7099 std::ostringstream os;
7100 os << *prefix <<
"Done" << endl;
7101 std::cerr << os.str();
7105 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7107 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7108 unpackAndCombineImplNonStatic(
7109 const Kokkos::DualView<
const local_ordinal_type*,
7110 buffer_device_type>& importLIDs,
7111 Kokkos::DualView<char*, buffer_device_type> imports,
7112 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7113 const size_t constantNumPackets,
7117 using Kokkos::subview;
7118 using Kokkos::MemoryUnmanaged;
7119 using Details::Behavior;
7122 using Details::PackTraits;
7123 using Details::ScalarViewTraits;
7125 using LO = LocalOrdinal;
7126 using GO = GlobalOrdinal;
7127 using ST = impl_scalar_type;
7128 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7130 typename View<int*, device_type>::HostMirror::execution_space;
7131 using pair_type = std::pair<typename View<int*, HES>::size_type,
7132 typename View<int*, HES>::size_type>;
7133 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7134 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7135 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7137 const bool debug = Behavior::debug(
"CrsMatrix");
7138 const bool verbose = Behavior::verbose(
"CrsMatrix");
7139 std::unique_ptr<std::string> prefix;
7141 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7142 std::ostringstream os;
7143 os << *prefix << endl;
7144 std::cerr << os.str ();
7146 const char*
const prefix_raw =
7147 verbose ? prefix.get()->c_str() :
nullptr;
7149 const size_type numImportLIDs = importLIDs.extent (0);
7150 if (combineMode ==
ZERO || numImportLIDs == 0) {
7154 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7155 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7160 if (imports.need_sync_host()) {
7161 imports.sync_host ();
7163 auto imports_h = imports.view_host();
7166 if (numPacketsPerLID.need_sync_host()) {
7167 numPacketsPerLID.sync_host ();
7169 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7171 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7172 auto importLIDs_h = importLIDs.view_host();
7174 size_t numBytesPerValue;
7185 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7190 size_t maxRowNumEnt = 0;
7191 for (size_type i = 0; i < numImportLIDs; ++i) {
7192 const size_t numBytes = numPacketsPerLID_h[i];
7193 if (numBytes == 0) {
7198 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7199 (offset + numBytes >
size_t(imports_h.extent (0)),
7200 std::logic_error,
": At local row index importLIDs_h[i="
7201 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7202 <<
") + numBytes (=" << numBytes <<
") > "
7203 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7208 const size_t theNumBytes =
7210 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7211 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7212 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7214 const char*
const inBuf = imports_h.data () + offset;
7215 const size_t actualNumBytes =
7219 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7220 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7221 <<
", actualNumBytes=" << actualNumBytes
7222 <<
" > numBytes=" << numBytes <<
".");
7223 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7224 (numEntLO == 0, std::logic_error,
": At local row index "
7225 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7226 "the number of entries read from the packed data is "
7227 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7231 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7239 View<GO*, HES> gblColInds;
7240 View<LO*, HES> lclColInds;
7241 View<ST*, HES> vals;
7254 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7255 gid, maxRowNumEnt,
"gids");
7256 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7257 lid, maxRowNumEnt,
"lids");
7258 vals = ScalarViewTraits<ST, HES>::allocateArray(
7259 val, maxRowNumEnt,
"vals");
7263 for (size_type i = 0; i < numImportLIDs; ++i) {
7264 const size_t numBytes = numPacketsPerLID_h[i];
7265 if (numBytes == 0) {
7269 const char*
const inBuf = imports_h.data () + offset;
7272 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7273 const LO lclRow = importLIDs_h[i];
7275 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7276 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7278 const size_t numBytesOut =
7279 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7280 offset, numBytes, numEnt, numBytesPerValue);
7281 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7282 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7283 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7284 << numBytesOut <<
".");
7286 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7287 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7288 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7289 combineMode, prefix_raw, debug, verbose);
7295 std::ostringstream os;
7296 os << *prefix <<
"Done" << endl;
7297 std::cerr << os.str();
7301 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7302 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7305 const bool force)
const
7307 using Teuchos::null;
7311 TEUCHOS_TEST_FOR_EXCEPTION(
7312 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7313 "MapMultiVector: You may only call this method if the matrix has a "
7314 "column Map. If the matrix does not yet have a column Map, you should "
7315 "first call fillComplete (with domain and range Map if necessary).");
7319 TEUCHOS_TEST_FOR_EXCEPTION(
7320 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7321 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7322 "this matrix's graph is fill complete.");
7325 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7326 RCP<const map_type> colMap = this->getColMap ();
7339 if (! importer.is_null () || force) {
7340 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7341 X_colMap = rcp (
new MV (colMap, numVecs));
7344 importMV_ = X_colMap;
7347 X_colMap = importMV_;
7358 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7359 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7362 const bool force)
const
7364 using Teuchos::null;
7370 TEUCHOS_TEST_FOR_EXCEPTION(
7371 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7372 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7373 "matrix's graph is fill complete.");
7376 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7380 RCP<const map_type> rowMap = this->getRowMap ();
7392 if (! exporter.is_null () || force) {
7393 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7394 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7395 exportMV_ = Y_rowMap;
7398 Y_rowMap = exportMV_;
7404 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7409 TEUCHOS_TEST_FOR_EXCEPTION(
7410 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7411 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7412 "was created with a constant graph (that is, when it was created using "
7413 "the version of its constructor that takes an RCP<const CrsGraph>). "
7414 "This is because the matrix is not allowed to modify the graph in that "
7415 "case, but removing empty processes requires modifying the graph.");
7416 myGraph_->removeEmptyProcessesInPlace (newMap);
7420 this->map_ = this->getRowMap ();
7424 staticGraph_ = Teuchos::rcp_const_cast<
const Graph> (myGraph_);
7427 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7428 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7433 const Teuchos::RCP<const map_type>& domainMap,
7434 const Teuchos::RCP<const map_type>& rangeMap,
7435 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7437 using Teuchos::Array;
7438 using Teuchos::ArrayView;
7439 using Teuchos::ParameterList;
7442 using Teuchos::rcp_implicit_cast;
7443 using Teuchos::sublist;
7447 using crs_matrix_type =
7449 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7453 std::unique_ptr<std::string> prefix;
7455 prefix = this->createPrefix(
"CrsMatrix",
"add");
7456 std::ostringstream os;
7457 os << *prefix <<
"Start" << endl;
7458 std::cerr << os.str ();
7461 const crs_matrix_type& B = *
this;
7462 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7463 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7470 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7471 RCP<const map_type> B_domainMap = B.getDomainMap ();
7472 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7474 RCP<const map_type> theDomainMap = domainMap;
7475 RCP<const map_type> theRangeMap = rangeMap;
7477 if (domainMap.is_null ()) {
7478 if (B_domainMap.is_null ()) {
7479 TEUCHOS_TEST_FOR_EXCEPTION(
7480 A_domainMap.is_null (), std::invalid_argument,
7481 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7482 "then you must supply a nonnull domain Map to this method.");
7483 theDomainMap = A_domainMap;
7485 theDomainMap = B_domainMap;
7488 if (rangeMap.is_null ()) {
7489 if (B_rangeMap.is_null ()) {
7490 TEUCHOS_TEST_FOR_EXCEPTION(
7491 A_rangeMap.is_null (), std::invalid_argument,
7492 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7493 "then you must supply a nonnull range Map to this method.");
7494 theRangeMap = A_rangeMap;
7496 theRangeMap = B_rangeMap;
7504 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7505 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7506 TEUCHOS_TEST_FOR_EXCEPTION
7507 (! B_domainMap->isSameAs(*A_domainMap),
7508 std::invalid_argument,
7509 errPfx <<
"The input RowMatrix A must have a domain Map "
7510 "which is the same as (isSameAs) this RowMatrix's "
7512 TEUCHOS_TEST_FOR_EXCEPTION
7513 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7514 errPfx <<
"The input RowMatrix A must have a range Map "
7515 "which is the same as (isSameAs) this RowMatrix's range "
7517 TEUCHOS_TEST_FOR_EXCEPTION
7518 (! domainMap.is_null() &&
7519 ! domainMap->isSameAs(*B_domainMap),
7520 std::invalid_argument,
7521 errPfx <<
"The input domain Map must be the same as "
7522 "(isSameAs) this RowMatrix's domain Map.");
7523 TEUCHOS_TEST_FOR_EXCEPTION
7524 (! rangeMap.is_null() &&
7525 ! rangeMap->isSameAs(*B_rangeMap),
7526 std::invalid_argument,
7527 errPfx <<
"The input range Map must be the same as "
7528 "(isSameAs) this RowMatrix's range Map.");
7531 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7532 TEUCHOS_TEST_FOR_EXCEPTION
7533 (! domainMap.is_null() &&
7534 ! domainMap->isSameAs(*B_domainMap),
7535 std::invalid_argument,
7536 errPfx <<
"The input domain Map must be the same as "
7537 "(isSameAs) this RowMatrix's domain Map.");
7538 TEUCHOS_TEST_FOR_EXCEPTION
7539 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7540 std::invalid_argument,
7541 errPfx <<
"The input range Map must be the same as "
7542 "(isSameAs) this RowMatrix's range Map.");
7545 TEUCHOS_TEST_FOR_EXCEPTION
7546 (domainMap.is_null() || rangeMap.is_null(),
7547 std::invalid_argument, errPfx <<
"If neither A nor B "
7548 "have a domain and range Map, then you must supply a "
7549 "nonnull domain and range Map to this method.");
7556 bool callFillComplete =
true;
7557 RCP<ParameterList> constructorSublist;
7558 RCP<ParameterList> fillCompleteSublist;
7559 if (! params.is_null()) {
7561 params->get(
"Call fillComplete", callFillComplete);
7562 constructorSublist = sublist(params,
"Constructor parameters");
7563 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7566 RCP<const map_type> A_rowMap = A.
getRowMap ();
7567 RCP<const map_type> B_rowMap = B.getRowMap ();
7568 RCP<const map_type> C_rowMap = B_rowMap;
7569 RCP<crs_matrix_type> C;
7575 if (A_rowMap->isSameAs (*B_rowMap)) {
7576 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7577 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7580 if (alpha != ZERO) {
7581 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7583 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7588 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7589 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7590 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7594 if (constructorSublist.is_null ()) {
7595 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7597 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7598 constructorSublist));
7609 TEUCHOS_TEST_FOR_EXCEPTION
7610 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7611 "be the same for statically allocated matrices, to ensure "
7612 "that there is sufficient space to do the addition.");
7615 TEUCHOS_TEST_FOR_EXCEPTION
7616 (C.is_null (), std::logic_error,
7617 errPfx <<
"C should not be null at this point. "
7618 "Please report this bug to the Tpetra developers.");
7621 std::ostringstream os;
7622 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7623 std::cerr << os.str ();
7625 using gids_type = nonconst_global_inds_host_view_type;
7626 using vals_type = nonconst_values_host_view_type;
7630 if (alpha != ZERO) {
7631 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7632 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7634 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7635 if (A_numEntries > static_cast<size_t> (ind.size ())) {
7636 Kokkos::resize(ind,A_numEntries);
7637 Kokkos::resize(val,A_numEntries);
7639 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7640 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7644 for (
size_t k = 0; k < A_numEntries; ++k) {
7645 valView[k] *= alpha;
7648 C->insertGlobalValues (globalRow, A_numEntries,
7649 reinterpret_cast<Scalar *>(valView.data()),
7655 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7656 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7657 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7658 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7659 if (B_numEntries > static_cast<size_t> (ind.size ())) {
7660 Kokkos::resize(ind,B_numEntries);
7661 Kokkos::resize(val,B_numEntries);
7663 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7664 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7665 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7668 for (
size_t k = 0; k < B_numEntries; ++k) {
7672 C->insertGlobalValues (globalRow, B_numEntries,
7673 reinterpret_cast<Scalar *>(valView.data()),
7678 if (callFillComplete) {
7680 std::ostringstream os;
7681 os << *prefix <<
"Call fillComplete on C" << endl;
7682 std::cerr << os.str ();
7684 if (fillCompleteSublist.is_null ()) {
7685 C->fillComplete (theDomainMap, theRangeMap);
7687 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7691 std::ostringstream os;
7692 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7693 std::cerr << os.str ();
7697 std::ostringstream os;
7698 os << *prefix <<
"Done" << endl;
7699 std::cerr << os.str ();
7706 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7710 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7711 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7712 const Teuchos::RCP<const map_type>& domainMap,
7713 const Teuchos::RCP<const map_type>& rangeMap,
7714 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7721 using Teuchos::ArrayRCP;
7722 using Teuchos::ArrayView;
7723 using Teuchos::Comm;
7724 using Teuchos::ParameterList;
7727 typedef LocalOrdinal LO;
7728 typedef GlobalOrdinal GO;
7729 typedef node_type NT;
7734 const bool debug = Behavior::debug(
"CrsMatrix");
7735 const bool verbose = Behavior::verbose(
"CrsMatrix");
7736 int MyPID = getComm ()->getRank ();
7738 std::unique_ptr<std::string> verbosePrefix;
7741 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7742 std::ostringstream os;
7743 os <<
"Start" << endl;
7744 std::cerr << os.str();
7751 bool reverseMode =
false;
7752 bool restrictComm =
false;
7754 int mm_optimization_core_count =
7755 Behavior::TAFC_OptimizationCoreCount();
7756 RCP<ParameterList> matrixparams;
7757 bool overrideAllreduce =
false;
7758 bool useKokkosPath =
false;
7759 if (! params.is_null ()) {
7760 matrixparams = sublist (params,
"CrsMatrix");
7761 reverseMode = params->get (
"Reverse Mode", reverseMode);
7762 useKokkosPath = params->get (
"TAFC: use kokkos path", useKokkosPath);
7763 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7764 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7765 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7766 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7768 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7769 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7770 if(reverseMode) isMM =
false;
7774 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7776 int reduced_mismatch = 0;
7777 if (isMM && !overrideAllreduce) {
7780 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7781 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7782 rowTransfer.getRemoteLIDs ().size() == 0);
7783 mismatch = (source_vals != target_vals) ? 1 : 0;
7786 Teuchos::REDUCE_MAX, * (getComm ()));
7789 #ifdef HAVE_TPETRA_MMM_TIMINGS
7790 using Teuchos::TimeMonitor;
7792 if(!params.is_null())
7793 label = params->get(
"Timer Label",label);
7794 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7797 std::ostringstream os;
7798 if(isMM) os<<
":MMOpt";
7799 else os<<
":MMLegacy";
7803 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7811 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7812 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7813 TEUCHOS_TEST_FOR_EXCEPTION(
7814 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7815 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7816 "argument must be either an Import or an Export, and its template "
7817 "parameters must match the corresponding template parameters of the "
7825 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<
const import_type> (domainTransfer);
7826 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<
const export_type> (domainTransfer);
7828 if(! domainTransfer.is_null()) {
7829 TEUCHOS_TEST_FOR_EXCEPTION(
7830 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7831 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7832 "argument must be either an Import or an Export, and its template "
7833 "parameters must match the corresponding template parameters of the "
7836 TEUCHOS_TEST_FOR_EXCEPTION(
7837 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7838 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7839 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7840 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7841 "arguments must be of the same type (either Import or Export).");
7843 TEUCHOS_TEST_FOR_EXCEPTION(
7844 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7845 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7846 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7847 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7848 "arguments must be of the same type (either Import or Export).");
7854 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7858 RCP<const map_type> MyRowMap = reverseMode ?
7859 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7860 RCP<const map_type> MyColMap;
7861 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7862 domainMap : getDomainMap ();
7863 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7864 rangeMap : getRangeMap ();
7865 RCP<const map_type> BaseRowMap = MyRowMap;
7866 RCP<const map_type> BaseDomainMap = MyDomainMap;
7874 if (! destMat.is_null ()) {
7885 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7886 ! destMat->getGraph ()->isGloballyIndexed ();
7887 TEUCHOS_TEST_FOR_EXCEPTION(
7888 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7889 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7890 "to be nonnull, if its graph is empty (neither locally nor globally "
7899 TEUCHOS_TEST_FOR_EXCEPTION(
7900 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7901 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7902 "input argument 'destMat' is not the same as the (row) Map specified "
7903 "by the input argument 'rowTransfer'.");
7904 TEUCHOS_TEST_FOR_EXCEPTION(
7905 ! destMat->checkSizes (*
this), std::invalid_argument,
7906 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7907 "destination matrix, but checkSizes() indicates that it is not a legal "
7908 "legal target for redistribution from the source matrix (*this). This "
7909 "may mean that they do not have the same dimensions.");
7923 TEUCHOS_TEST_FOR_EXCEPTION(
7924 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7925 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7926 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7927 TEUCHOS_TEST_FOR_EXCEPTION(
7928 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7929 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7930 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7933 TEUCHOS_TEST_FOR_EXCEPTION(
7934 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7935 std::invalid_argument,
7936 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7937 "argument must be the same as the rebalanced domain map 'domainMap'");
7939 TEUCHOS_TEST_FOR_EXCEPTION(
7940 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7941 std::invalid_argument,
7942 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7943 "argument must be the same as the rebalanced domain map 'domainMap'");
7956 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7957 ArrayView<const LO> ExportLIDs = reverseMode ?
7958 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7959 auto RemoteLIDs = reverseMode ?
7960 rowTransfer.getExportLIDs_dv() : rowTransfer.getRemoteLIDs_dv();
7961 auto PermuteToLIDs = reverseMode ?
7962 rowTransfer.getPermuteFromLIDs_dv() : rowTransfer.getPermuteToLIDs_dv();
7963 auto PermuteFromLIDs = reverseMode ?
7964 rowTransfer.getPermuteToLIDs_dv() : rowTransfer.getPermuteFromLIDs_dv();
7965 Distributor& Distor = rowTransfer.getDistributor ();
7968 Teuchos::Array<int> SourcePids;
7971 RCP<const map_type> ReducedRowMap, ReducedColMap,
7972 ReducedDomainMap, ReducedRangeMap;
7973 RCP<const Comm<int> > ReducedComm;
7977 if (destMat.is_null ()) {
7978 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7985 #ifdef HAVE_TPETRA_MMM_TIMINGS
7986 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrictComm")));
7988 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7989 ReducedComm = ReducedRowMap.is_null () ?
7991 ReducedRowMap->getComm ();
7992 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7994 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7996 MyDomainMap->replaceCommWithSubset (ReducedComm);
7997 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7999 MyRangeMap->replaceCommWithSubset (ReducedComm);
8002 MyRowMap = ReducedRowMap;
8003 MyDomainMap = ReducedDomainMap;
8004 MyRangeMap = ReducedRangeMap;
8007 if (! ReducedComm.is_null ()) {
8008 MyPID = ReducedComm->getRank ();
8015 ReducedComm = MyRowMap->getComm ();
8024 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8027 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8029 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8030 #ifdef HAVE_TPETRA_MMM_TIMINGS
8031 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs same map")));
8039 Import_Util::getPids (*MyImporter, SourcePids,
false);
8041 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8044 #ifdef HAVE_TPETRA_MMM_TIMINGS
8045 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs restricted comm")));
8047 IntVectorType SourceDomain_pids(getDomainMap (),
true);
8048 IntVectorType SourceCol_pids(getColMap());
8050 SourceDomain_pids.putScalar(MyPID);
8052 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8053 SourcePids.resize (getColMap ()->getLocalNumElements ());
8054 SourceCol_pids.get1dCopy (SourcePids ());
8056 else if (MyImporter.is_null ()) {
8058 #ifdef HAVE_TPETRA_MMM_TIMINGS
8059 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs all local entries")));
8061 SourcePids.resize (getColMap ()->getLocalNumElements ());
8062 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8064 else if ( ! MyImporter.is_null () &&
8065 ! domainTransfer.is_null () ) {
8070 #ifdef HAVE_TPETRA_MMM_TIMINGS
8071 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs rectangular case")));
8075 IntVectorType TargetDomain_pids (domainMap);
8076 TargetDomain_pids.putScalar (MyPID);
8079 IntVectorType SourceDomain_pids (getDomainMap ());
8082 IntVectorType SourceCol_pids (getColMap ());
8084 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8085 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8087 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8088 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8090 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8091 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport,
INSERT);
8093 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8094 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport,
INSERT);
8097 TEUCHOS_TEST_FOR_EXCEPTION(
8098 true, std::logic_error,
"Tpetra::CrsMatrix::"
8099 "transferAndFillComplete: Should never get here! "
8100 "Please report this bug to a Tpetra developer.");
8102 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter,
INSERT);
8103 SourcePids.resize (getColMap ()->getLocalNumElements ());
8104 SourceCol_pids.get1dCopy (SourcePids ());
8106 else if ( ! MyImporter.is_null () &&
8107 BaseDomainMap->isSameAs (*BaseRowMap) &&
8108 getDomainMap ()->isSameAs (*getRowMap ())) {
8110 #ifdef HAVE_TPETRA_MMM_TIMINGS
8111 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs query import")));
8114 IntVectorType TargetRow_pids (domainMap);
8115 IntVectorType SourceRow_pids (getRowMap ());
8116 IntVectorType SourceCol_pids (getColMap ());
8118 TargetRow_pids.putScalar (MyPID);
8119 if (! reverseMode && xferAsImport !=
nullptr) {
8120 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport,
INSERT);
8122 else if (reverseMode && xferAsExport !=
nullptr) {
8123 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport,
INSERT);
8125 else if (! reverseMode && xferAsExport !=
nullptr) {
8126 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport,
INSERT);
8128 else if (reverseMode && xferAsImport !=
nullptr) {
8129 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport,
INSERT);
8132 TEUCHOS_TEST_FOR_EXCEPTION(
8133 true, std::logic_error,
"Tpetra::CrsMatrix::"
8134 "transferAndFillComplete: Should never get here! "
8135 "Please report this bug to a Tpetra developer.");
8138 SourceCol_pids.doImport (SourceRow_pids, *MyImporter,
INSERT);
8139 SourcePids.resize (getColMap ()->getLocalNumElements ());
8140 SourceCol_pids.get1dCopy (SourcePids ());
8143 TEUCHOS_TEST_FOR_EXCEPTION(
8144 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8145 "transferAndFillComplete: This method only allows either domainMap == "
8146 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8147 "getDomainMap () == getRowMap ()).");
8151 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8153 #ifdef HAVE_TPETRA_MMM_TIMINGS
8154 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC reallocate buffers")));
8156 if (constantNumPackets == 0) {
8157 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8158 RemoteLIDs.view_host().size ());
8165 const size_t rbufLen = RemoteLIDs.view_host().size() * constantNumPackets;
8166 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8172 #ifdef HAVE_TPETRA_MMM_TIMINGS
8173 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC pack and prepare")));
8176 using Teuchos::outArg;
8177 using Teuchos::REDUCE_MAX;
8178 using Teuchos::reduceAll;
8181 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8182 const int myRank = comm->getRank ();
8184 std::ostringstream errStrm;
8188 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8191 destMat->numExportPacketsPerLID_.modify_host ();
8192 numExportPacketsPerLID =
8195 catch (std::exception& e) {
8196 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8197 << e.what () << std::endl;
8201 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8202 "an exception not a subclass of std::exception" << std::endl;
8206 if (! comm.is_null ()) {
8207 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8210 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8211 TEUCHOS_TEST_FOR_EXCEPTION(
8212 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8213 "exception on at least one process.");
8217 std::ostringstream os;
8218 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8220 std::cerr << os.str ();
8225 numExportPacketsPerLID,
8228 constantNumPackets);
8230 catch (std::exception& e) {
8231 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8232 << e.what () << std::endl;
8236 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8237 "an exception not a subclass of std::exception" << std::endl;
8242 std::ostringstream os;
8243 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8245 std::cerr << os.str ();
8248 if (! comm.is_null ()) {
8249 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8252 ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8253 TEUCHOS_TEST_FOR_EXCEPTION(
8254 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8255 "exception on at least one process.");
8260 destMat->numExportPacketsPerLID_.modify_host ();
8261 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8264 std::ostringstream os;
8265 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8267 std::cerr << os.str ();
8271 numExportPacketsPerLID,
8274 constantNumPackets);
8276 std::ostringstream os;
8277 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8279 std::cerr << os.str ();
8286 #ifdef HAVE_TPETRA_MMM_TIMINGS
8287 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC getOwningPIDs exchange remote data")));
8289 if (! communication_needed) {
8291 std::ostringstream os;
8292 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8293 std::cerr << os.str ();
8298 if (constantNumPackets == 0) {
8300 std::ostringstream os;
8301 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8303 std::cerr << os.str ();
8305 destMat->numExportPacketsPerLID_.sync_device();
8306 auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device();
8307 auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device();
8309 std::ostringstream os;
8310 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8312 std::cerr << os.str ();
8314 Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID);
8316 std::ostringstream os;
8317 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8319 std::cerr << os.str ();
8322 size_t totalImportPackets = Kokkos::Experimental::reduce(
typename Node::execution_space(), numImportPacketsPerLID);
8326 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8327 verbosePrefix.get ());
8328 destMat->imports_.modify_host ();
8329 auto deviceImports = destMat->imports_.view_device();
8330 auto deviceExports = destMat->exports_.view_device();
8332 std::ostringstream os;
8333 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaitsKokkos"
8335 std::cerr << os.str ();
8337 destMat->imports_.sync_device();
8338 Distor.doReversePostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID);
8340 std::ostringstream os;
8341 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaitsKokkos"
8343 std::cerr << os.str ();
8348 std::ostringstream os;
8349 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8351 std::cerr << os.str ();
8353 destMat->imports_.modify_host ();
8354 auto hostImports = destMat->imports_.view_host();
8357 destMat->exports_.sync_host ();
8358 auto hostExports = destMat->exports_.view_host();
8360 std::ostringstream os;
8361 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8363 std::cerr << os.str ();
8365 Distor.doReversePostsAndWaits (hostExports,
8369 std::ostringstream os;
8370 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8372 std::cerr << os.str ();
8377 if (constantNumPackets == 0) {
8379 std::ostringstream os;
8380 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8382 std::cerr << os.str ();
8384 destMat->numExportPacketsPerLID_.sync_device ();
8385 auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device();
8386 auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device();
8388 std::ostringstream os;
8389 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8391 std::cerr << os.str ();
8393 Distor.doPostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID);
8395 std::ostringstream os;
8396 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8398 std::cerr << os.str ();
8401 size_t totalImportPackets = Kokkos::Experimental::reduce(
typename Node::execution_space(), numImportPacketsPerLID);
8405 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8406 verbosePrefix.get ());
8407 destMat->imports_.modify_host ();
8408 auto deviceImports = destMat->imports_.view_device();
8409 auto deviceExports = destMat->exports_.view_device();
8411 std::ostringstream os;
8412 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaitsKokkos"
8414 std::cerr << os.str ();
8416 destMat->imports_.sync_device ();
8417 Distor.doPostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID);
8419 std::ostringstream os;
8420 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaitsKokkos"
8422 std::cerr << os.str ();
8427 std::ostringstream os;
8428 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8430 std::cerr << os.str ();
8432 destMat->imports_.modify_host ();
8433 auto hostImports = destMat->imports_.view_host();
8436 destMat->exports_.sync_host ();
8437 auto hostExports = destMat->exports_.view_host();
8439 std::ostringstream os;
8440 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8442 std::cerr << os.str ();
8444 Distor.doPostsAndWaits (hostExports,
8448 std::ostringstream os;
8449 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8451 std::cerr << os.str ();
8462 bool runOnHost = std::is_same_v<typename device_type::memory_space, Kokkos::HostSpace> && !useKokkosPath;
8464 Teuchos::Array<int> RemotePids;
8466 Teuchos::Array<int> TargetPids;
8468 # ifdef HAVE_TPETRA_MMM_TIMINGS
8469 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8471 ArrayRCP<size_t> CSR_rowptr;
8472 ArrayRCP<GO> CSR_colind_GID;
8473 ArrayRCP<LO> CSR_colind_LID;
8474 ArrayRCP<Scalar> CSR_vals;
8476 destMat->imports_.sync_device ();
8477 destMat->numImportPacketsPerLID_.sync_device ();
8479 size_t N = BaseRowMap->getLocalNumElements ();
8481 auto RemoteLIDs_d = RemoteLIDs.view_device();
8482 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8483 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8488 destMat->imports_.view_device(),
8489 destMat->numImportPacketsPerLID_.view_device(),
8503 if (
typeid (LO) ==
typeid (GO)) {
8504 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8507 CSR_colind_LID.resize (CSR_colind_GID.size());
8509 CSR_colind_LID.resize (CSR_colind_GID.size());
8514 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8516 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8518 #ifdef HAVE_TPETRA_MMM_TIMINGS
8519 tmCopySPRdata = Teuchos::null;
8528 std::ostringstream os;
8529 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8531 std::cerr << os.str ();
8534 #ifdef HAVE_TPETRA_MMM_TIMINGS
8535 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8537 Import_Util::lowCommunicationMakeColMapAndReindexSerial(CSR_rowptr (),
8547 std::ostringstream os;
8548 os << *verbosePrefix <<
"restrictComm="
8549 << (restrictComm ?
"true" :
"false") << std::endl;
8550 std::cerr << os.str ();
8557 #ifdef HAVE_TPETRA_MMM_TIMINGS
8558 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8561 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8563 MyColMap->replaceCommWithSubset (ReducedComm);
8564 MyColMap = ReducedColMap;
8569 std::ostringstream os;
8570 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8571 std::cerr << os.str ();
8573 destMat->replaceColMap (MyColMap);
8580 if (ReducedComm.is_null ()) {
8582 std::ostringstream os;
8583 os << *verbosePrefix <<
"I am no longer in the communicator; "
8584 "returning" << std::endl;
8585 std::cerr << os.str ();
8594 if ((! reverseMode && xferAsImport !=
nullptr) ||
8595 (reverseMode && xferAsExport !=
nullptr)) {
8597 std::ostringstream os;
8598 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8599 std::cerr << os.str ();
8601 #ifdef HAVE_TPETRA_MMM_TIMINGS
8602 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8604 Import_Util::sortCrsEntries (CSR_rowptr(),
8608 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8609 (reverseMode && xferAsImport !=
nullptr)) {
8611 std::ostringstream os;
8612 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8614 std::cerr << os.str();
8616 #ifdef HAVE_TPETRA_MMM_TIMINGS
8617 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8619 Import_Util::sortAndMergeCrsEntries (CSR_rowptr(),
8622 if (CSR_rowptr[N] != static_cast<size_t>(CSR_vals.size())) {
8623 CSR_colind_LID.resize (CSR_rowptr[N]);
8624 CSR_vals.resize (CSR_rowptr[N]);
8628 TEUCHOS_TEST_FOR_EXCEPTION(
8629 true, std::logic_error,
"Tpetra::CrsMatrix::"
8630 "transferAndFillComplete: Should never get here! "
8631 "Please report this bug to a Tpetra developer.");
8638 std::ostringstream os;
8639 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8640 std::cerr << os.str ();
8649 #ifdef HAVE_TPETRA_MMM_TIMINGS
8650 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8652 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8658 # ifdef HAVE_TPETRA_MMM_TIMINGS
8659 RCP<TimeMonitor> tmCopySPRdata = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC unpack-count-resize + copy same-perm-remote data"))));
8661 ArrayRCP<size_t> CSR_rowptr;
8662 ArrayRCP<GO> CSR_colind_GID;
8663 ArrayRCP<LO> CSR_colind_LID;
8664 ArrayRCP<Scalar> CSR_vals;
8666 destMat->imports_.sync_device ();
8667 destMat->numImportPacketsPerLID_.sync_device ();
8669 size_t N = BaseRowMap->getLocalNumElements ();
8671 auto RemoteLIDs_d = RemoteLIDs.view_device();
8672 auto PermuteToLIDs_d = PermuteToLIDs.view_device();
8673 auto PermuteFromLIDs_d = PermuteFromLIDs.view_device();
8675 Kokkos::View<size_t*,device_type> CSR_rowptr_d;
8676 Kokkos::View<GO*,device_type> CSR_colind_GID_d;
8677 Kokkos::View<LO*,device_type> CSR_colind_LID_d;
8678 Kokkos::View<impl_scalar_type*,device_type> CSR_vals_d;
8679 Kokkos::View<int*,device_type> TargetPids_d;
8684 destMat->imports_.view_device(),
8685 destMat->numImportPacketsPerLID_.view_device(),
8697 Kokkos::resize (CSR_colind_LID_d, CSR_colind_GID_d.size());
8699 #ifdef HAVE_TPETRA_MMM_TIMINGS
8700 tmCopySPRdata = Teuchos::null;
8709 std::ostringstream os;
8710 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8712 std::cerr << os.str ();
8715 #ifdef HAVE_TPETRA_MMM_TIMINGS
8716 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC makeColMap")));
8718 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr_d,
8728 std::ostringstream os;
8729 os << *verbosePrefix <<
"restrictComm="
8730 << (restrictComm ?
"true" :
"false") << std::endl;
8731 std::cerr << os.str ();
8738 #ifdef HAVE_TPETRA_MMM_TIMINGS
8739 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC restrict colmap")));
8742 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8744 MyColMap->replaceCommWithSubset (ReducedComm);
8745 MyColMap = ReducedColMap;
8750 std::ostringstream os;
8751 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8752 std::cerr << os.str ();
8754 destMat->replaceColMap (MyColMap);
8761 if (ReducedComm.is_null ()) {
8763 std::ostringstream os;
8764 os << *verbosePrefix <<
"I am no longer in the communicator; "
8765 "returning" << std::endl;
8766 std::cerr << os.str ();
8776 if ((! reverseMode && xferAsImport !=
nullptr) ||
8777 (reverseMode && xferAsExport !=
nullptr)) {
8779 std::ostringstream os;
8780 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8781 std::cerr << os.str ();
8783 #ifdef HAVE_TPETRA_MMM_TIMINGS
8784 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortCrsEntries")));
8786 Import_Util::sortCrsEntries (CSR_rowptr_d,
8790 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8791 (reverseMode && xferAsImport !=
nullptr)) {
8793 std::ostringstream os;
8794 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8796 std::cerr << os.str();
8798 #ifdef HAVE_TPETRA_MMM_TIMINGS
8799 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC sortAndMergeCrsEntries")));
8801 Import_Util::sortAndMergeCrsEntries (CSR_rowptr_d,
8806 TEUCHOS_TEST_FOR_EXCEPTION(
8807 true, std::logic_error,
"Tpetra::CrsMatrix::"
8808 "transferAndFillComplete: Should never get here! "
8809 "Please report this bug to a Tpetra developer.");
8817 std::ostringstream os;
8818 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8819 std::cerr << os.str ();
8823 #ifdef HAVE_TPETRA_MMM_TIMINGS
8824 Teuchos::TimeMonitor MMrc(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC setAllValues")));
8826 destMat->setAllValues (CSR_rowptr_d, CSR_colind_LID_d, CSR_vals_d);
8834 #ifdef HAVE_TPETRA_MMM_TIMINGS
8835 RCP<TimeMonitor> tmIESFC = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC build importer and esfc"))));
8838 Teuchos::ParameterList esfc_params;
8840 RCP<import_type> MyImport;
8843 if (iallreduceRequest.get () !=
nullptr) {
8845 std::ostringstream os;
8846 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8848 std::cerr << os.str ();
8850 iallreduceRequest->wait ();
8851 if (reduced_mismatch != 0) {
8857 #ifdef HAVE_TPETRA_MMM_TIMINGS
8858 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8863 std::ostringstream os;
8864 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8865 std::cerr << os.str ();
8868 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8869 Teuchos::ArrayRCP<int> type3PIDs;
8870 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8871 auto colind = getCrsGraph()->getLocalIndicesHost();
8874 std::ostringstream os;
8875 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8876 std::cerr << os.str ();
8880 #ifdef HAVE_TPETRA_MMM_TIMINGS
8881 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8883 Import_Util::reverseNeighborDiscovery(*
this,
8895 std::ostringstream os;
8896 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8897 std::cerr << os.str ();
8900 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8901 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8903 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8904 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8906 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8908 std::vector<bool> IsOwned(numCols,
true);
8909 std::vector<int> SentTo(numCols,-1);
8910 if (! MyImporter.is_null ()) {
8911 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8912 IsOwned[rlid]=
false;
8916 std::vector<std::pair<int,GO> > usrtg;
8917 usrtg.reserve(TEPID2.size());
8920 const auto& colMap = * (this->getColMap ());
8922 const LO row = TELID2[i];
8923 const int pid = TEPID2[i];
8924 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8925 const int col = colind[j];
8926 if (IsOwned[col] && SentTo[col] != pid) {
8928 GO gid = colMap.getGlobalElement (col);
8929 usrtg.push_back (std::pair<int,GO> (pid, gid));
8937 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8939 usrtg.erase(eopg,usrtg.end());
8942 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8943 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8946 for(
auto && p : usrtg) {
8947 EPID2[pos]= p.first;
8948 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8952 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8953 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8954 GO InfGID = std::numeric_limits<GO>::max();
8955 int InfPID = INT_MAX;
8958 #endif // TPETRA_MIN3
8959 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
8960 int i1=0, i2=0, i3=0;
8961 int Len1 = EPID1.size();
8962 int Len2 = EPID2.size();
8963 int Len3 = EPID3.size();
8965 int MyLen=Len1+Len2+Len3;
8966 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
8967 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
8970 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
8971 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
8972 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
8973 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
8975 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
8976 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
8977 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
8979 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
8980 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
8983 #endif // TPETRA_MIN3
8984 bool added_entry=
false;
8986 if(PID1 == MIN_PID && GID1 == MIN_GID){
8987 userExportLIDs[iloc]=ELID1[i1];
8988 userExportPIDs[iloc]=EPID1[i1];
8993 if(PID2 == MIN_PID && GID2 == MIN_GID){
8995 userExportLIDs[iloc]=ELID2[i2];
8996 userExportPIDs[iloc]=EPID2[i2];
9002 if(PID3 == MIN_PID && GID3 == MIN_GID){
9004 userExportLIDs[iloc]=ELID3[i3];
9005 userExportPIDs[iloc]=EPID3[i3];
9013 std::ostringstream os;
9014 os << *verbosePrefix <<
"Create Import" << std::endl;
9015 std::cerr << os.str ();
9018 #ifdef HAVE_TPETRA_MMM_TIMINGS
9019 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
9021 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
9023 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9024 MyImport = rcp (
new import_type (MyDomainMap,
9027 userExportLIDs.view(0,iloc).getConst(),
9028 userExportPIDs.view(0,iloc).getConst(),
9033 std::ostringstream os;
9034 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
9035 std::cerr << os.str ();
9039 #ifdef HAVE_TPETRA_MMM_TIMINGS
9040 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
9041 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
9043 if(!params.is_null())
9044 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
9045 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
9051 #ifdef HAVE_TPETRA_MMM_TIMINGS
9052 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
9055 std::ostringstream os;
9056 os << *verbosePrefix <<
"Create Import" << std::endl;
9057 std::cerr << os.str ();
9060 #ifdef HAVE_TPETRA_MMM_TIMINGS
9061 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
9063 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
9064 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
9065 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9066 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9069 std::ostringstream os;
9070 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
9071 std::cerr << os.str ();
9074 #ifdef HAVE_TPETRA_MMM_TIMINGS
9075 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
9076 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
9078 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
9081 if (!params.is_null ()) {
9082 esfc_params.set (
"compute global constants",
9083 params->get (
"compute global constants",
true));
9085 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9086 MyImport, Teuchos::null,
9087 rcp (
new Teuchos::ParameterList (esfc_params)));
9090 #ifdef HAVE_TPETRA_MMM_TIMINGS
9091 tmIESFC = Teuchos::null;
9095 std::ostringstream os;
9096 os << *verbosePrefix <<
"Done" << endl;
9097 std::cerr << os.str ();
9102 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9107 const Teuchos::RCP<const map_type>& domainMap,
9108 const Teuchos::RCP<const map_type>& rangeMap,
9109 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9111 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9120 const Teuchos::RCP<const map_type>& domainMap,
9121 const Teuchos::RCP<const map_type>& rangeMap,
9122 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9124 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9127 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9132 const Teuchos::RCP<const map_type>& domainMap,
9133 const Teuchos::RCP<const map_type>& rangeMap,
9134 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9136 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9139 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
9145 const Teuchos::RCP<const map_type>& domainMap,
9146 const Teuchos::RCP<const map_type>& rangeMap,
9147 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
9149 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9160 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9162 template class CrsMatrix< SCALAR , LO , GO , NODE >;
9164 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9166 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9167 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9169 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9171 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9172 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9173 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9174 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9175 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9176 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9177 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9178 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9179 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9180 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9181 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9182 const Teuchos::RCP<Teuchos::ParameterList>& params);
9184 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9186 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9187 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9188 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9189 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9190 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9191 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9192 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9193 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9194 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9195 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9196 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9197 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9198 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9199 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9200 const Teuchos::RCP<Teuchos::ParameterList>& params);
9203 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9205 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9206 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9207 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9208 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9209 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9210 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9211 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9212 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9213 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9214 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9215 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9216 const Teuchos::RCP<Teuchos::ParameterList>& params);
9218 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9220 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9221 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9222 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9223 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9224 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9225 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9226 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9227 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9228 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9229 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9230 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9231 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9232 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9233 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9234 const Teuchos::RCP<Teuchos::ParameterList>& params);
9237 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9238 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9239 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9240 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9241 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9242 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9244 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
virtual LocalOrdinal replaceGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceGlobalValues.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
static size_t mergeRowIndicesAndValues(size_t rowLen, local_ordinal_type *cols, impl_scalar_type *vals)
Merge duplicate row indices in the given row, along with their corresponding values.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets) const
Pack this object's data for an Import or Export.
virtual void insertGlobalValuesImpl(crs_graph_type &graph, RowInfo &rowInfo, const GlobalOrdinal gblColInds[], const impl_scalar_type vals[], const size_t numInputEnt)
Common implementation detail of insertGlobalValues and insertGlobalValuesFiltered.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given objects.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > ¶ms)
Fill data into the local matrix.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual LocalOrdinal sumIntoLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoLocalValues.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don't currently exist.
values_dualv_type::t_dev::const_type getValuesViewDevice(const RowInfo &rowinfo) const
Get a const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow)...
bool isFillComplete() const override
Whether the matrix is fill complete.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror num_row_entries_type
Row offsets for "1-D" storage.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2, const bool stableSort=false)
Sort the first array, and apply the resulting permutation to the second array.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using global indices.
Replace old value with maximum of magnitudes of old and new values.
virtual LocalOrdinal sumIntoGlobalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const GlobalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts, const bool atomic=useAtomicUpdatesByDefault)
Implementation detail of sumIntoGlobalValues.
Abstract base class for objects that can be the source of an Import or Export operation.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using local row and column indices.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
Replace existing values with new values.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Replace old values with zero.
const row_ptrs_host_view_type & getRowPtrsUnpackedHost() const
Get the unpacked row pointers on host. Lazily make a copy from device.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
virtual LocalOrdinal replaceLocalValuesImpl(impl_scalar_type rowVals[], const crs_graph_type &graph, const RowInfo &rowInfo, const LocalOrdinal inds[], const impl_scalar_type newVals[], const LocalOrdinal numElts)
Implementation detail of replaceLocalValues.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
values_dualv_type::t_dev getValuesViewDeviceNonConst(const RowInfo &rowinfo)
Get a non-const Device view of the locally owned values row myRow, such that rowinfo = getRowInfo(myR...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the LocalMap for Kokkos-Kernels.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
A parallel distribution of indices over processes.
void getLocalRowCopy(LocalOrdinal LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const CombineMode CM=ADD)
Insert one or more entries into the matrix, using local column indices.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant view of a row of this matrix, using local row and column indices.
values_dualv_type::t_host::const_type getValuesViewHost(const RowInfo &rowinfo) const
Get a const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow).
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void setAllValues(const typename local_graph_device_type::row_map_type &ptr, const typename local_graph_device_type::entries_type::non_const_type &ind, const typename local_matrix_device_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
Replace one or more entries' values, using local row and column indices.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current range Map with the given objects.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrixDevice() that supports local matrix-vect...
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
Accumulate new values into existing values (may not be supported in all classes)
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
virtual void copyAndPermute(const SrcDistObject &source, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
values_dualv_type::t_host getValuesViewHostNonConst(const RowInfo &rowinfo)
Get a non-const Host view of the locally owned values row myRow, such that rowinfo = getRowInfo(myRow...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.