Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_CrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIX_DEF_HPP
43 #define TPETRA_CRSMATRIX_DEF_HPP
44 
52 
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_Import_Util.hpp"
55 #include "Tpetra_Import_Util2.hpp"
56 
62 #include "Tpetra_Details_gathervPrint.hpp"
63 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
64 #include "Tpetra_Details_leftScaleLocalCrsMatrix.hpp"
66 #include "Tpetra_Details_rightScaleLocalCrsMatrix.hpp"
67 #include "KokkosSparse_getDiagCopy.hpp"
68 #include "Tpetra_Details_copyConvert.hpp"
71 #include "Tpetra_Details_packCrsMatrix.hpp"
72 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
73 #include "Teuchos_FancyOStream.hpp"
74 #include "Teuchos_RCP.hpp"
75 #include "Teuchos_SerialDenseMatrix.hpp" // unused here, could delete
76 #include <memory>
77 #include <sstream>
78 #include <typeinfo>
79 #include <utility>
80 #include <vector>
81 
82 using Teuchos::rcpFromRef;
83 
84 namespace Tpetra {
85 
86 namespace { // (anonymous)
87 
88  template<class T, class BinaryFunction>
89  T atomic_binary_function_update (volatile T* const dest,
90  const T& inputVal,
91  BinaryFunction f)
92  {
93  T oldVal = *dest;
94  T assume;
95 
96  // NOTE (mfh 30 Nov 2015) I do NOT need a fence here for IBM
97  // POWER architectures, because 'newval' depends on 'assume',
98  // which depends on 'oldVal', which depends on '*dest'. This
99  // sets up a chain of read dependencies that should ensure
100  // correct behavior given a sane memory model.
101  do {
102  assume = oldVal;
103  T newVal = f (assume, inputVal);
104  oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
105  } while (assume != oldVal);
106 
107  return oldVal;
108  }
109 } // namespace (anonymous)
110 
111 //
112 // Users must never rely on anything in the Details namespace.
113 //
114 namespace Details {
115 
125 template<class Scalar>
126 struct AbsMax {
128  Scalar operator() (const Scalar& x, const Scalar& y) {
129  typedef Teuchos::ScalarTraits<Scalar> STS;
130  return std::max (STS::magnitude (x), STS::magnitude (y));
131  }
132 };
133 
134 } // namespace Details
135 } // namespace Tpetra
136 
137 namespace Tpetra {
138 
139  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
140  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
141  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
142  size_t maxNumEntriesPerRow,
143  const ProfileType pftype,
144  const Teuchos::RCP<Teuchos::ParameterList>& params) :
145  dist_object_type (rowMap),
146  storageStatus_ (pftype == StaticProfile ?
147  ::Tpetra::Details::STORAGE_1D_UNPACKED :
148  ::Tpetra::Details::STORAGE_2D),
149  fillComplete_ (false),
150  frobNorm_ (-STM::one ())
151  {
152  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, size_t, "
153  "ProfileType[, RCP<ParameterList>]): ";
154  Teuchos::RCP<crs_graph_type> graph;
155  try {
156  graph = Teuchos::rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
157  pftype, params));
158  }
159  catch (std::exception& e) {
160  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
161  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
162  "size_t, ProfileType[, RCP<ParameterList>]) threw an exception: "
163  << e.what ());
164  }
165  // myGraph_ not null means that the matrix owns the graph. That's
166  // different than the const CrsGraph constructor, where the matrix
167  // does _not_ own the graph.
168  myGraph_ = graph;
169  staticGraph_ = myGraph_;
170  resumeFill (params);
172  }
173 
174  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
176  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
177  const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
178  const ProfileType pftype,
179  const Teuchos::RCP<Teuchos::ParameterList>& params) :
180  dist_object_type (rowMap),
181  storageStatus_ (pftype == StaticProfile ?
182  ::Tpetra::Details::STORAGE_1D_UNPACKED :
183  ::Tpetra::Details::STORAGE_2D),
184  fillComplete_ (false),
185  frobNorm_ (-STM::one ())
186  {
187  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
188  "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]): ";
189  Teuchos::RCP<crs_graph_type> graph;
190  try {
191  graph = Teuchos::rcp (new crs_graph_type (rowMap, numEntPerRowToAlloc,
192  pftype, params));
193  }
194  catch (std::exception &e) {
195  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
196  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
197  "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]) threw "
198  "an exception: " << e.what ());
199  }
200  // myGraph_ not null means that the matrix owns the graph. That's
201  // different than the const CrsGraph constructor, where the matrix
202  // does _not_ own the graph.
203  myGraph_ = graph;
204  staticGraph_ = graph;
205  resumeFill (params);
207  }
208 
209 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
210  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
212  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
213  const Teuchos::ArrayRCP<const size_t>& numEntPerRowToAlloc,
214  const ProfileType pftype,
215  const Teuchos::RCP<Teuchos::ParameterList>& params) :
216  dist_object_type (rowMap),
217  storageStatus_ (pftype == StaticProfile ?
218  ::Tpetra::Details::STORAGE_1D_UNPACKED :
219  ::Tpetra::Details::STORAGE_2D),
220  fillComplete_ (false),
221  frobNorm_ (-STM::one ())
222  {
223  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
224  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
225  Teuchos::RCP<crs_graph_type> graph;
226  try {
227  graph = Teuchos::rcp (new crs_graph_type (rowMap, numEntPerRowToAlloc (),
228  pftype, params));
229  }
230  catch (std::exception &e) {
231  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
232  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
233  "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]) threw "
234  "an exception: " << e.what ());
235  }
236  // myGraph_ not null means that the matrix owns the graph. That's
237  // different than the const CrsGraph constructor, where the matrix
238  // does _not_ own the graph.
239  myGraph_ = graph;
240  staticGraph_ = graph;
241  resumeFill (params);
243  }
244 #endif // TPETRA_ENABLE_DEPRECATED_CODE
245 
246  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
247  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
248  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
249  const Teuchos::RCP<const map_type>& colMap,
250  const size_t maxNumEntPerRow,
251  const ProfileType pftype,
252  const Teuchos::RCP<Teuchos::ParameterList>& params) :
253  dist_object_type (rowMap),
254  storageStatus_ (pftype == StaticProfile ?
255  ::Tpetra::Details::STORAGE_1D_UNPACKED :
256  ::Tpetra::Details::STORAGE_2D),
257  fillComplete_ (false),
258  frobNorm_ (-STM::one ())
259  {
260  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
261  "size_t, ProfileType[, RCP<ParameterList>]): ";
262 
263 #ifdef HAVE_TPETRA_DEBUG
264  // An artifact of debugging something a while back.
265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
266  (! staticGraph_.is_null (), std::logic_error,
267  "staticGraph_ is not null at the beginning of the constructor. "
268  "Please report this bug to the Tpetra developers.");
269  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
270  (! myGraph_.is_null (), std::logic_error,
271  "myGraph_ is not null at the beginning of the constructor. "
272  "Please report this bug to the Tpetra developers.");
273 #endif // HAVE_TPETRA_DEBUG
274 
275  Teuchos::RCP<crs_graph_type> graph;
276  try {
277  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
278  maxNumEntPerRow,
279  pftype, params));
280  }
281  catch (std::exception &e) {
282  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
283  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
284  "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]) threw an "
285  "exception: " << e.what ());
286  }
287  // myGraph_ not null means that the matrix owns the graph. That's
288  // different than the const CrsGraph constructor, where the matrix
289  // does _not_ own the graph.
290  myGraph_ = graph;
291  staticGraph_ = myGraph_;
292  resumeFill (params);
294  }
295 
296  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
298  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
299  const Teuchos::RCP<const map_type>& colMap,
300  const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
301  const ProfileType pftype,
302  const Teuchos::RCP<Teuchos::ParameterList>& params) :
303  dist_object_type (rowMap),
304  storageStatus_ (pftype == StaticProfile ?
305  ::Tpetra::Details::STORAGE_1D_UNPACKED :
306  ::Tpetra::Details::STORAGE_2D),
307  fillComplete_ (false),
308  frobNorm_ (-STM::one ())
309  {
310  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
311  "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]): ";
312  Teuchos::RCP<crs_graph_type> graph;
313  try {
314  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
315  numEntPerRowToAlloc,
316  pftype, params));
317  }
318  catch (std::exception &e) {
319  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
320  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
321  "RCP<const Map>, ArrayView<const size_t>, ProfileType[, "
322  "RCP<ParameterList>]) threw an exception: " << e.what ());
323  }
324  // myGraph_ not null means that the matrix owns the graph. That's
325  // different than the const CrsGraph constructor, where the matrix
326  // does _not_ own the graph.
327  myGraph_ = graph;
328  staticGraph_ = graph;
329  resumeFill (params);
331  }
332 
333 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
334  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
336  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
337  const Teuchos::RCP<const map_type>& colMap,
338  const Teuchos::ArrayRCP<const size_t>& numEntPerRowToAlloc,
339  const ProfileType pftype,
340  const Teuchos::RCP<Teuchos::ParameterList>& params) :
341  dist_object_type (rowMap),
342  storageStatus_ (pftype == StaticProfile ?
343  ::Tpetra::Details::STORAGE_1D_UNPACKED :
344  ::Tpetra::Details::STORAGE_2D),
345  fillComplete_ (false),
346  frobNorm_ (-STM::one ())
347  {
348  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
349  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
350  Teuchos::RCP<crs_graph_type> graph;
351  try {
352  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
353  numEntPerRowToAlloc (),
354  pftype, params));
355  }
356  catch (std::exception &e) {
357  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
358  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
359  "RCP<const Map>, ArrayView<const size_t>, ProfileType[, "
360  "RCP<ParameterList>]) threw an exception: " << e.what ());
361  }
362  // myGraph_ not null means that the matrix owns the graph. That's
363  // different than the const CrsGraph constructor, where the matrix
364  // does _not_ own the graph.
365  myGraph_ = graph;
366  staticGraph_ = graph;
367  resumeFill (params);
369  }
370 #endif // TPETRA_ENABLE_DEPRECATED_CODE
371 
372  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
373  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
374  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
375  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
376  dist_object_type (graph->getRowMap ()),
377  staticGraph_ (graph),
378  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
379  fillComplete_ (false),
380  frobNorm_ (-STM::one ())
381  {
382  typedef typename local_matrix_type::values_type values_type;
383  const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>[, "
384  "RCP<ParameterList>]): ";
385  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
386  (graph.is_null (), std::runtime_error, "Input graph is null.");
387  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
388  (! graph->isFillComplete (), std::runtime_error, "Input graph is not "
389  "fill complete. You must call fillComplete on the graph before using "
390  "it to construct a CrsMatrix. Note that calling resumeFill on the "
391  "graph makes it not fill complete, even if you had previously called "
392  "fillComplete. In that case, you must call fillComplete on the graph "
393  "again.");
394 
395  // The graph is fill complete, so it is locally indexed and has a
396  // fixed structure. This means we can allocate the (1-D) array of
397  // values and build the local matrix right now. Note that the
398  // local matrix's number of columns comes from the column Map, not
399  // the domain Map.
400 
401  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
402  auto lclGraph = graph->getLocalGraph ();
403  const size_t numEnt = lclGraph.entries.extent (0);
404  values_type val ("Tpetra::CrsMatrix::val", numEnt);
405 
406  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
407  numCols, val, lclGraph);
408  // FIXME (22 Jun 2016) I would very much like to get rid of
409  // k_values1D_ at some point. I find it confusing to have all
410  // these extra references lying around.
411  this->k_values1D_ = this->lclMatrix_.values;
412 
414  }
415 
416  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
418  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
419  const typename local_matrix_type::values_type& values,
420  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
421  dist_object_type (graph->getRowMap ()),
422  staticGraph_ (graph),
423  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
424  fillComplete_ (false),
425  frobNorm_ (-STM::one ())
426  {
427  const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>,local_matrix_type::values_type,[, "
428  "RCP<ParameterList>]): ";
429  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
430  (graph.is_null (), std::runtime_error, "Input graph is null.");
431  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
432  (! graph->isFillComplete (), std::runtime_error, "Input graph is not "
433  "fill complete. You must call fillComplete on the graph before using "
434  "it to construct a CrsMatrix. Note that calling resumeFill on the "
435  "graph makes it not fill complete, even if you had previously called "
436  "fillComplete. In that case, you must call fillComplete on the graph "
437  "again.");
438 
439  // The graph is fill complete, so it is locally indexed and has a
440  // fixed structure. This means we can allocate the (1-D) array of
441  // values and build the local matrix right now. Note that the
442  // local matrix's number of columns comes from the column Map, not
443  // the domain Map.
444 
445  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
446  auto lclGraph = graph->getLocalGraph ();
447  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
448  numCols, values, lclGraph);
449  // FIXME (22 Jun 2016) I would very much like to get rid of
450  // k_values1D_ at some point. I find it confusing to have all
451  // these extra references lying around.
452  this->k_values1D_ = this->lclMatrix_.values;
453 
455  }
456 
457 
458 
459  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
461  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
462  const Teuchos::RCP<const map_type>& colMap,
463  const typename local_matrix_type::row_map_type& rowPointers,
464  const typename local_graph_type::entries_type::non_const_type& columnIndices,
465  const typename local_matrix_type::values_type& values,
466  const Teuchos::RCP<Teuchos::ParameterList>& params) :
467  dist_object_type (rowMap),
468  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
469  fillComplete_ (false),
470  frobNorm_ (-STM::one ())
471  {
472  using Teuchos::RCP;
473  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
474  "RCP<const Map>, ptr, ind, val[, params]): ";
475  const char suffix[] = ". Please report this bug to the Tpetra developers.";
476 
477  // Check the user's input. Note that this might throw only on
478  // some processes but not others, causing deadlock. We prefer
479  // deadlock due to exceptions to segfaults, because users can
480  // catch exceptions.
481  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
482  (values.extent (0) != columnIndices.extent (0),
483  std::invalid_argument, "Input arrays don't have matching dimensions. "
484  "values.extent(0) = " << values.extent (0) << " != "
485  "columnIndices.extent(0) = " << columnIndices.extent (0) << ".");
486 #ifdef HAVE_TPETRA_DEBUG
487  if (rowPointers.extent (0) != 0) {
488  const size_t numEnt =
489  ::Tpetra::Details::getEntryOnHost (rowPointers, rowPointers.extent (0) - 1);
490  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
491  (numEnt != static_cast<size_t> (columnIndices.extent (0)) ||
492  numEnt != static_cast<size_t> (values.extent (0)),
493  std::invalid_argument, "Last entry of rowPointers says that the matrix"
494  " has " << numEnt << " entr" << (numEnt != 1 ? "ies" : "y") << ", but "
495  "the dimensions of columnIndices and values don't match this. "
496  "columnIndices.extent(0) = " << columnIndices.extent (0) <<
497  " and values.extent(0) = " << values.extent (0) << ".");
498  }
499 #endif // HAVE_TPETRA_DEBUG
500 
501  RCP<crs_graph_type> graph;
502  try {
503  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, rowPointers,
504  columnIndices, params));
505  }
506  catch (std::exception& e) {
507  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
508  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
509  "RCP<const Map>, ptr, ind[, params]) threw an exception: "
510  << e.what ());
511  }
512  // The newly created CrsGraph _must_ have a local graph at this
513  // point. We don't really care whether CrsGraph's constructor
514  // deep-copies or shallow-copies the input, but the dimensions
515  // have to be right. That's how we tell whether the CrsGraph has
516  // a local graph.
517  auto lclGraph = graph->getLocalGraph ();
518  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
519  (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
520  lclGraph.entries.extent (0) != columnIndices.extent (0),
521  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
522  "ind[, params]) did not set the local graph correctly." << suffix);
523  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
524  (lclGraph.entries.extent (0) != values.extent (0),
525  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
526  "params]) did not set the local graph correctly. "
527  "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
528  << " != values.extent(0) = " << values.extent (0) << suffix);
529 
530  // myGraph_ not null means that the matrix owns the graph. This
531  // is true because the column indices come in as nonconst,
532  // implying shared ownership.
533  myGraph_ = graph;
534  staticGraph_ = graph;
535 
536  // The graph may not be fill complete yet. However, it is locally
537  // indexed (since we have a column Map) and has a fixed structure
538  // (due to the input arrays). This means we can allocate the
539  // (1-D) array of values and build the local matrix right now.
540  // Note that the local matrix's number of columns comes from the
541  // column Map, not the domain Map.
542 
543  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
544  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
545  numCols, values, lclGraph);
546  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
547  (lclMatrix_.values.extent (0) != values.extent (0),
548  std::logic_error, "Local matrix's constructor did not set the values "
549  "correctly. lclMatrix_.values.extent(0) = " <<
550  lclMatrix_.values.extent (0) << " != values.extent(0) = " <<
551  values.extent (0) << suffix);
552 
553  // FIXME (22 Jun 2016) I would very much like to get rid of
554  // k_values1D_ at some point. I find it confusing to have all
555  // these extra references lying around.
556  this->k_values1D_ = this->lclMatrix_.values;
557 
559  }
560 
561  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
563  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
564  const Teuchos::RCP<const map_type>& colMap,
565  const Teuchos::ArrayRCP<size_t>& ptr,
566  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
567  const Teuchos::ArrayRCP<Scalar>& val,
568  const Teuchos::RCP<Teuchos::ParameterList>& params) :
569  dist_object_type (rowMap),
570  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
571  fillComplete_ (false),
572  frobNorm_ (-STM::one ())
573  {
574  using Kokkos::Compat::getKokkosViewDeepCopy;
575  using Teuchos::av_reinterpret_cast;
576  using Teuchos::RCP;
577  typedef typename local_matrix_type::values_type values_type;
578  typedef impl_scalar_type IST;
579  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
580  "RCP<const Map>, ptr, ind, val[, params]): ";
581 
582  RCP<crs_graph_type> graph;
583  try {
584  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, ptr,
585  ind, params));
586  }
587  catch (std::exception& e) {
588  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
589  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
590  "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
591  "RCP<ParameterList>]) threw an exception: " << e.what ());
592  }
593  // myGraph_ not null means that the matrix owns the graph. This
594  // is true because the column indices come in as nonconst,
595  // implying shared ownership.
596  myGraph_ = graph;
597  staticGraph_ = graph;
598 
599  // The graph may not be fill complete yet. However, it is locally
600  // indexed (since we have a column Map) and has a fixed structure
601  // (due to the input arrays). This means we can allocate the
602  // (1-D) array of values and build the local matrix right now.
603  // Note that the local matrix's number of columns comes from the
604  // column Map, not the domain Map.
605 
606  // The graph _must_ have a local graph at this point. We don't
607  // really care whether CrsGraph's constructor deep-copies or
608  // shallow-copies the input, but the dimensions have to be right.
609  // That's how we tell whether the CrsGraph has a local graph.
610  auto lclGraph = staticGraph_->getLocalGraph ();
611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
612  (static_cast<size_t> (lclGraph.row_map.extent (0)) != static_cast<size_t> (ptr.size ()) ||
613  static_cast<size_t> (lclGraph.entries.extent (0)) != static_cast<size_t> (ind.size ()),
614  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
615  "ind[, params]) did not set the local graph correctly. Please "
616  "report this bug to the Tpetra developers.");
617 
618  const size_t numCols = staticGraph_->getColMap ()->getNodeNumElements ();
619  values_type valIn = getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
620  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
621  numCols, valIn, lclGraph);
622  // FIXME (22 Jun 2016) I would very much like to get rid of
623  // k_values1D_ at some point. I find it confusing to have all
624  // these extra references lying around.
625  this->k_values1D_ = this->lclMatrix_.values;
626 
628  }
629 
630  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
632  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
633  const Teuchos::RCP<const map_type>& colMap,
634  const local_matrix_type& lclMatrix,
635  const Teuchos::RCP<Teuchos::ParameterList>& params) :
636  dist_object_type (rowMap),
637  lclMatrix_ (lclMatrix),
638  k_values1D_ (lclMatrix.values),
639  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
640  fillComplete_ (true),
641  frobNorm_ (-STM::one ())
642  {
643  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
644  "RCP<const Map>, local_matrix_type[, RCP<ParameterList>]): ";
645  Teuchos::RCP<crs_graph_type> graph;
646  try {
647  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
648  lclMatrix.graph, params));
649  }
650  catch (std::exception& e) {
651  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
652  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
653  "RCP<const Map>, local_graph_type[, RCP<ParameterList>]) threw an "
654  "exception: " << e.what ());
655  }
656  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
657  (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
658  "<const Map>, RCP<const Map>, local_graph_type[, RCP<ParameterList>]) "
659  "did not produce a fill-complete graph. Please report this bug to the "
660  "Tpetra developers.");
661  // myGraph_ not null means that the matrix owns the graph. This
662  // is true because the column indices come in as nonconst through
663  // the matrix, implying shared ownership.
664  myGraph_ = graph;
665  staticGraph_ = graph;
666 
667  const bool callComputeGlobalConstants = params.get () == nullptr ||
668  params->get ("compute global constants", true);
669  if (callComputeGlobalConstants) {
670  this->computeGlobalConstants ();
671  }
672 
673  // Sanity checks at the end.
674 #ifdef HAVE_TPETRA_DEBUG
675  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
676  "We're at the end of fillComplete(), but isFillActive() is true. "
677  "Please report this bug to the Tpetra developers.");
678  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
679  "We're at the end of fillComplete(), but isFillComplete() is false. "
680  "Please report this bug to the Tpetra developers.");
681 #endif // HAVE_TPETRA_DEBUG
683  }
684 
685  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
687  CrsMatrix (const local_matrix_type& lclMatrix,
688  const Teuchos::RCP<const map_type>& rowMap,
689  const Teuchos::RCP<const map_type>& colMap,
690  const Teuchos::RCP<const map_type>& domainMap,
691  const Teuchos::RCP<const map_type>& rangeMap,
692  const Teuchos::RCP<Teuchos::ParameterList>& params) :
693  dist_object_type (rowMap),
694  lclMatrix_ (lclMatrix),
695  k_values1D_ (lclMatrix.values),
696  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
697  fillComplete_ (true),
698  frobNorm_ (-STM::one ())
699  {
700  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
701  "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_matrix_type[, "
702  "RCP<ParameterList>]): ";
703  Teuchos::RCP<crs_graph_type> graph;
704  try {
705  graph = Teuchos::rcp (new crs_graph_type (lclMatrix.graph, rowMap, colMap,
706  domainMap, rangeMap, params));
707  }
708  catch (std::exception& e) {
709  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
710  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
711  "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_type[, "
712  "RCP<ParameterList>]) threw an exception: " << e.what ());
713  }
714  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
715  (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
716  "<const Map>, RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_type[, "
717  "RCP<ParameterList>]) did not produce a fill-complete graph. Please report this "
718  "bug to the Tpetra developers.");
719  // myGraph_ not null means that the matrix owns the graph. This
720  // is true because the column indices come in as nonconst through
721  // the matrix, implying shared ownership.
722  myGraph_ = graph;
723  staticGraph_ = graph;
724 
725  const bool callComputeGlobalConstants = params.get () == nullptr ||
726  params->get ("compute global constants", true);
727  if (callComputeGlobalConstants) {
728  this->computeGlobalConstants ();
729  }
730 
731  // Sanity checks at the end.
732 #ifdef HAVE_TPETRA_DEBUG
733  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
734  "We're at the end of fillComplete(), but isFillActive() is true. "
735  "Please report this bug to the Tpetra developers.");
736  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
737  "We're at the end of fillComplete(), but isFillComplete() is false. "
738  "Please report this bug to the Tpetra developers.");
739 #endif // HAVE_TPETRA_DEBUG
741  }
742 
743 
744  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
746  const Teuchos::DataAccess copyOrView)
747  :CrsMatrix(source.getCrsGraph(),source.getLocalValuesView())
748  {
749  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const CrsMatrix>&, const Teuchos::DataAccess): ";
750  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!source.isFillComplete(),std::invalid_argument,"Source graph must be fillComplete().");
751 
752  if (copyOrView == Teuchos::Copy) {
753  typename local_matrix_type::values_type vals = source.getLocalValuesView();
754  typename local_matrix_type::values_type newvals;
755  Kokkos::resize(newvals,vals.extent(0));
756  Kokkos::deep_copy(newvals,vals);
757  k_values1D_ = newvals;
758  if (source.isFillComplete ()) {
759  this->fillComplete(source.getDomainMap(),source.getRangeMap());
760  }
761  }
762  else if (copyOrView == Teuchos::View) {
763  return;
764  }
765  else {
766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
767  true, std::invalid_argument, "Second argument 'copyOrView' has an "
768  "invalid value " << copyOrView << ". Valid values include "
769  "Teuchos::Copy = " << Teuchos::Copy << " and Teuchos::View = "
770  << Teuchos::View << ".");
771  }
772  }
773 
774 
775 
776  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
777  void
780  {
781  std::swap(crs_matrix.importMV_, this->importMV_); // mutable Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
782  std::swap(crs_matrix.exportMV_, this->exportMV_); // mutable Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
783  std::swap(crs_matrix.staticGraph_, this->staticGraph_); // Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>>
784  std::swap(crs_matrix.myGraph_, this->myGraph_); // Teuchos::RCP< CrsGraph<LocalOrdinal, GlobalOrdinal, Node>>
785  std::swap(crs_matrix.lclMatrix_, this->lclMatrix_); // KokkosSparse::CrsMatrix<impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type>
786  std::swap(crs_matrix.k_values1D_, this->k_values1D_); // KokkosSparse::CrsMatrix<impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type>::values_type
787  std::swap(crs_matrix.values2D_, this->values2D_); // Teuchos::ArrayRCP<Teuchos::Array<Kokkos::Details::ArithTraits<Scalar>::val_type>>
788  std::swap(crs_matrix.storageStatus_, this->storageStatus_); // ::Tpetra::Details::EStorageStatus (enum f/m Tpetra_CrsGraph_decl.hpp)
789  std::swap(crs_matrix.fillComplete_, this->fillComplete_); // bool
790  std::swap(crs_matrix.nonlocals_, this->nonlocals_); // std::map<GO, pair<Teuchos::Array<GO>,Teuchos::Array<Scalar>>
791  std::swap(crs_matrix.frobNorm_, this->frobNorm_); // mutable Kokkos::Details::ArithTraits<impl_scalar_type>::mag_type
792  }
793 
794 
795  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
796  Teuchos::RCP<const Teuchos::Comm<int> >
798  getComm () const {
799  return getCrsGraphRef ().getComm ();
800  }
801 
802 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
803  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
804  TPETRA_DEPRECATED
805  Teuchos::RCP<Node>
807  getNode () const {
808  return getCrsGraphRef ().getNode ();
809  }
810 #endif // TPETRA_ENABLE_DEPRECATED_CODE
811 
812  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
815  getProfileType () const {
816  return this->getCrsGraphRef ().getProfileType ();
817  }
818 
819  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
820  bool
822  isFillComplete () const {
823  return fillComplete_;
824  }
825 
826  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
827  bool
829  isFillActive () const {
830  return ! fillComplete_;
831  }
832 
833  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
834  bool
837  return this->getCrsGraphRef ().isStorageOptimized ();
838  }
839 
840  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
841  bool
844  return getCrsGraphRef ().isLocallyIndexed ();
845  }
846 
847  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
848  bool
851  return getCrsGraphRef ().isGloballyIndexed ();
852  }
853 
854  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
855  bool
857  hasColMap () const {
858  return getCrsGraphRef ().hasColMap ();
859  }
860 
861  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
865  return getCrsGraphRef ().getGlobalNumEntries ();
866  }
867 
868  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
869  size_t
872  return getCrsGraphRef ().getNodeNumEntries ();
873  }
874 
875  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
879  return getCrsGraphRef ().getGlobalNumRows ();
880  }
881 
882  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
886  return getCrsGraphRef ().getGlobalNumCols ();
887  }
888 
889  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
890  size_t
892  getNodeNumRows () const {
893  return getCrsGraphRef ().getNodeNumRows ();
894  }
895 
896  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
897  size_t
899  getNodeNumCols () const {
900  return getCrsGraphRef ().getNodeNumCols ();
901  }
902 
903 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
904  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
905  global_size_t TPETRA_DEPRECATED
907  getGlobalNumDiags () const {
908  return this->getGlobalNumDiagsImpl ();
909  }
910 
911  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
912  size_t TPETRA_DEPRECATED
913  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
914  getNodeNumDiags () const {
915  return this->getNodeNumDiagsImpl ();
916  }
917 
918  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
920  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
921  getGlobalNumDiagsImpl () const {
922  const crs_graph_type& G = this->getCrsGraphRef ();
923  using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
924  return dynamic_cast<const HDM&> (G).getGlobalNumDiagsImpl ();
925  }
926 
927  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
928  size_t
929  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
930  getNodeNumDiagsImpl () const {
931  const crs_graph_type& G = this->getCrsGraphRef ();
932  using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
933  return dynamic_cast<const HDM&> (G).getNodeNumDiagsImpl ();
934  }
935 #endif // TPETRA_ENABLE_DEPRECATED_CODE
936 
937  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
938  size_t
940  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
941  return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
942  }
943 
944  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
945  size_t
947  getNumEntriesInLocalRow (LocalOrdinal localRow) const {
948  return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
949  }
950 
951  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
952  size_t
955  return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
956  }
957 
958  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
959  size_t
962  return getCrsGraphRef ().getNodeMaxNumRowEntries ();
963  }
964 
965  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
966  GlobalOrdinal
968  getIndexBase () const {
969  return getRowMap ()->getIndexBase ();
970  }
971 
972  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
973  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
975  getRowMap () const {
976  return getCrsGraphRef ().getRowMap ();
977  }
978 
979  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
980  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
982  getColMap () const {
983  return getCrsGraphRef ().getColMap ();
984  }
985 
986  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
987  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
989  getDomainMap () const {
990  return getCrsGraphRef ().getDomainMap ();
991  }
992 
993  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
994  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
996  getRangeMap () const {
997  return getCrsGraphRef ().getRangeMap ();
998  }
999 
1000  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1001  Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
1003  getGraph () const {
1004  if (staticGraph_ != Teuchos::null) {
1005  return staticGraph_;
1006  }
1007  return myGraph_;
1008  }
1009 
1010  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1011  Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
1013  getCrsGraph () const {
1014  if (staticGraph_ != Teuchos::null) {
1015  return staticGraph_;
1016  }
1017  return myGraph_;
1018  }
1019 
1020  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1023  getCrsGraphRef () const {
1024  if (! this->staticGraph_.is_null ()) {
1025  return * (this->staticGraph_);
1026  }
1027  else {
1028 #ifdef HAVE_TPETRA_DEBUG
1029  const char tfecfFuncName[] = "getCrsGraphRef: ";
1030  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1031  (this->myGraph_.is_null (), std::logic_error,
1032  "Both staticGraph_ and myGraph_ are null. "
1033  "Please report this bug to the Tpetra developers.");
1034 #endif // HAVE_TPETRA_DEBUG
1035  return * (this->myGraph_);
1036  }
1037  }
1038 
1039 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1040  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1041  bool TPETRA_DEPRECATED
1042  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1043  isLowerTriangular () const {
1044  return this->isLowerTriangularImpl ();
1045  }
1046 
1047  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1048  bool TPETRA_DEPRECATED
1049  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1050  isUpperTriangular () const {
1051  return this->isUpperTriangularImpl ();
1052  }
1053 
1054  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1055  bool
1056  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1057  isLowerTriangularImpl () const {
1058  const crs_graph_type& G = this->getCrsGraphRef ();
1059  using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
1060  return dynamic_cast<const HDM&> (G).isLowerTriangularImpl ();
1061  }
1062 
1063  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1064  bool
1065  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1066  isUpperTriangularImpl () const {
1067  const crs_graph_type& G = this->getCrsGraphRef ();
1068  using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
1069  return dynamic_cast<const HDM&> (G).isUpperTriangularImpl ();
1070  }
1071 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1072 
1073  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1074  bool
1076  isStaticGraph () const {
1077  return myGraph_.is_null ();
1078  }
1079 
1080  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1081  bool
1084  return true;
1085  }
1086 
1087  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1088  bool
1091  return true;
1092  }
1093 
1094  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1095  Teuchos::ArrayRCP<Teuchos::Array<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type> >
1098  {
1099  using Teuchos::arcp;
1100  using Teuchos::Array;
1101  using Teuchos::ArrayRCP;
1102  typedef impl_scalar_type IST;
1103  typedef LocalOrdinal LO;
1104  const char tfecfFuncName[] = "allocateValues2D: ";
1105 
1106  const crs_graph_type& graph = this->getCrsGraphRef ();
1107  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1108  (! graph.indicesAreAllocated (), std::runtime_error,
1109  "Graph indices must be allocated before values.");
1110  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1111  (graph.getProfileType () == StaticProfile, std::runtime_error,
1112  "Graph indices must be allocated in a dynamic profile.");
1113 
1114  const LO lclNumRows = graph.getNodeNumRows ();
1115  Teuchos::ArrayRCP<Teuchos::Array<IST> > values2D (lclNumRows);
1116  if (! graph.lclInds2D_.is_null ()) {
1117  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1118  values2D[lclRow].resize (graph.lclInds2D_[lclRow].size ());
1119  }
1120  }
1121  else if (! graph.gblInds2D_.is_null ()) {
1122  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1123  values2D[lclRow].resize (graph.gblInds2D_[lclRow].size ());
1124  }
1125  }
1126  return values2D;
1127  }
1128 
1129  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1130  void
1132  allocateValues (ELocalGlobal lg, GraphAllocationStatus gas)
1133  {
1134  using ::Tpetra::Details::ProfilingRegion;
1135  const char tfecfFuncName[] = "allocateValues: ";
1136  ProfilingRegion regionAllocateValues ("Tpetra::CrsMatrix::allocateValues");
1137 
1138 #ifdef HAVE_TPETRA_DEBUG
1139  const char suffix[] = " Please report this bug to the Tpetra developers.";
1140 
1141  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1142  (this->staticGraph_.is_null (), std::logic_error,
1143  "staticGraph_ is null." << suffix);
1144 
1145  // If the graph indices are already allocated, then gas should be
1146  // GraphAlreadyAllocated. Otherwise, gas should be
1147  // GraphNotYetAllocated.
1148  if ((gas == GraphAlreadyAllocated) != this->staticGraph_->indicesAreAllocated ()) {
1149  const char err1[] = "The caller has asserted that the graph is ";
1150  const char err2[] = "already allocated, but the static graph says "
1151  "that its indices are ";
1152  const char err3[] = "already allocated. Please report this bug to "
1153  "the Tpetra developers.";
1154  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1155  (gas == GraphAlreadyAllocated && ! this->staticGraph_->indicesAreAllocated (),
1156  std::logic_error, err1 << err2 << "not " << err3);
1157  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1158  (gas != GraphAlreadyAllocated && this->staticGraph_->indicesAreAllocated (),
1159  std::logic_error, err1 << "not " << err2 << err3);
1160  }
1161 
1162  // If the graph is unallocated, then it had better be a
1163  // matrix-owned graph. ("Matrix-owned graph" means that the
1164  // matrix gets to define the graph structure. If the CrsMatrix
1165  // constructor that takes an RCP<const CrsGraph> was used, then
1166  // the matrix does _not_ own the graph.)
1167  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1168  (! this->staticGraph_->indicesAreAllocated () &&
1169  this->myGraph_.is_null (), std::logic_error,
1170  "The static graph says that its indices are not allocated, "
1171  "but the graph is not owned by the matrix." << suffix);
1172 #endif // HAVE_TPETRA_DEBUG
1173 
1174  if (gas == GraphNotYetAllocated) {
1175 #ifdef HAVE_TPETRA_DEBUG
1176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177  (this->myGraph_.is_null (), std::logic_error,
1178  "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1179 #endif // HAVE_TPETRA_DEBUG
1180  try {
1181  this->myGraph_->allocateIndices (lg);
1182  }
1183  catch (std::exception& e) {
1184  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1185  (true, std::runtime_error, "CrsGraph::allocateIndices "
1186  "threw an exception: " << e.what ());
1187  }
1188  catch (...) {
1189  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1190  (true, std::runtime_error, "CrsGraph::allocateIndices "
1191  "threw an exception not a subclass of std::exception.");
1192  }
1193  }
1194 
1195  // Allocate matrix values.
1196  if (this->getProfileType () == StaticProfile) {
1197  // "Static profile" means that the number of matrix entries in
1198  // each row was fixed at the time the CrsMatrix constructor was
1199  // called. This lets us use 1-D storage for the matrix's
1200  // values. ("1-D storage" means the same as that used by the
1201  // three arrays in the compressed sparse row storage format.)
1202 
1203 #ifdef HAVE_TPETRA_DEBUG
1204  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1205  (this->staticGraph_.is_null (), std::logic_error,
1206  "this->getProfileType() == StaticProfile, but staticGraph_ is null."
1207  << suffix);
1208 #endif // HAVE_TPETRA_DEBUG
1209 
1210  const size_t lclNumRows = this->staticGraph_->getNodeNumRows ();
1211  typename Graph::local_graph_type::row_map_type k_ptrs =
1212  this->staticGraph_->k_rowPtrs_;
1213  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1214  (k_ptrs.extent (0) != lclNumRows+1, std::logic_error,
1215  "With StaticProfile, row offsets array has length "
1216  << k_ptrs.extent (0) << " != (lclNumRows+1) = "
1217  << (lclNumRows+1) << ".");
1218 
1219  const size_t lclTotalNumEntries =
1220  ::Tpetra::Details::getEntryOnHost (k_ptrs, lclNumRows);
1221 
1222  // Allocate array of (packed???) matrix values.
1223  typedef typename local_matrix_type::values_type values_type;
1224  this->k_values1D_ =
1225  values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1226  }
1227  else {
1228  // "Dynamic profile" means the number of matrix entries in each
1229  // row is not fixed and may expand. Thus, we store the matrix's
1230  // values in "2-D storage," meaning an array of arrays. The
1231  // outer array has as many inner arrays as there are rows in the
1232  // matrix, and each inner array stores the values in that row.
1233  this->values2D_ = this->allocateValues2D ();
1234  }
1235  }
1236 
1237  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1238  void
1240  getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
1241  Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
1242  Teuchos::ArrayRCP<const Scalar>& values) const
1243  {
1244  using Teuchos::RCP;
1245  const char tfecfFuncName[] = "getAllValues: ";
1246  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1247  columnIndices.size () != values.size (), std::runtime_error,
1248  "Requires that columnIndices and values are the same size.");
1249 
1250  RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
1251  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1252  relevantGraph.is_null (), std::runtime_error,
1253  "Requires that getCrsGraph() is not null.");
1254  try {
1255  rowPointers = relevantGraph->getNodeRowPtrs ();
1256  }
1257  catch (std::exception &e) {
1258  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1259  true, std::runtime_error,
1260  "Caught exception while calling graph->getNodeRowPtrs(): "
1261  << e.what ());
1262  }
1263  try {
1264  columnIndices = relevantGraph->getNodePackedIndices ();
1265  }
1266  catch (std::exception &e) {
1267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1268  true, std::runtime_error,
1269  "Caught exception while calling graph->getNodePackedIndices(): "
1270  << e.what ());
1271  }
1272  Teuchos::ArrayRCP<const impl_scalar_type> vals =
1273  Kokkos::Compat::persistingView (k_values1D_);
1274  values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
1275  }
1276 
1277  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1278  void
1280  fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1281  {
1283  using ::Tpetra::Details::ProfilingRegion;
1284  using Kokkos::create_mirror_view;
1285  using Teuchos::arcp_const_cast;
1286  using Teuchos::Array;
1287  using Teuchos::ArrayRCP;
1288  using Teuchos::null;
1289  using Teuchos::RCP;
1290  using Teuchos::rcp;
1291  typedef typename local_matrix_type::row_map_type row_map_type;
1292  typedef typename Graph::local_graph_type::entries_type::non_const_type lclinds_1d_type;
1293  typedef typename local_matrix_type::values_type values_type;
1294  ProfilingRegion regionFLGAM ("Tpetra::CrsGraph::fillLocalGraphAndMatrix");
1295 
1296 #ifdef HAVE_TPETRA_DEBUG
1297  const char tfecfFuncName[] = "fillLocalGraphAndMatrix (called from "
1298  "fillComplete or expertStaticFillComplete): ";
1299 #endif // HAVE_TPETRA_DEBUG
1300 
1301 #ifdef HAVE_TPETRA_DEBUG
1302  // fillComplete() only calls fillLocalGraphAndMatrix() if the
1303  // matrix owns the graph, which means myGraph_ is not null.
1304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1305  (myGraph_.is_null (), std::logic_error, "The nonconst graph (myGraph_) "
1306  "is null. This means that the matrix has a const (a.k.a. \"static\") "
1307  "graph. fillComplete or expertStaticFillComplete should never call "
1308  "fillLocalGraphAndMatrix in that case. "
1309  "Please report this bug to the Tpetra developers.");
1310 #endif // HAVE_TPETRA_DEBUG
1311 
1312  const size_t lclNumRows = this->getNodeNumRows ();
1313 
1314  // This method's goal is to fill in the three arrays (compressed
1315  // sparse row format) that define the sparse graph's and matrix's
1316  // structure, and the sparse matrix's values.
1317  //
1318  // Use the nonconst version of row_map_type for k_ptrs,
1319  // because row_map_type is const and we need to modify k_ptrs here.
1320  typename row_map_type::non_const_type k_ptrs;
1321  row_map_type k_ptrs_const;
1322  lclinds_1d_type k_inds;
1323  values_type k_vals;
1324 
1325  // Get references to the data in myGraph_, so we can modify them
1326  // as well. Note that we only call fillLocalGraphAndMatrix() if
1327  // the matrix owns the graph, which means myGraph_ is not null.
1328  lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_;
1329 
1330  typedef decltype (myGraph_->k_numRowEntries_) row_entries_type;
1331 
1332  if (getProfileType () != StaticProfile) {
1333  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1334  //
1335  // DynamicProfile means that the matrix's column indices and
1336  // values are currently stored in a 2-D "unpacked" format, in
1337  // the arrays-of-arrays myGraph_->lclInds2D_ (for column
1338  // indices) and values2D_ (for values). We allocate 1-D storage
1339  // (k_inds resp. k_vals), and then copy from 2-D storage
1340  // (lclInds2D_ resp. values2D_) into 1-D storage (k_inds
1341  // resp. k_vals).
1342 
1343  // We're be packing on host. k_numRowEntries_ lives on host,
1344  // and computeOffsetsFromCounts accepts a host View for counts,
1345  // even if offsets is a device View. (Furthermore, the "host"
1346  // View may very well live in CudaUVMSpace, so doing this has no
1347  // penalty, other than requiring synchronization between Cuda
1348  // and host. UVM memory gets grumpy if both device and host
1349  // attempt to access it at the same time without an intervening
1350  // fence.)
1351  typename row_entries_type::const_type numRowEnt_h =
1352  myGraph_->k_numRowEntries_;
1353 #ifdef HAVE_TPETRA_DEBUG
1354  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1355  (static_cast<size_t> (numRowEnt_h.extent (0)) != lclNumRows,
1356  std::logic_error, "(DynamicProfile branch) numRowEnt_h has the "
1357  "wrong length. numRowEnt_h.extent(0) = "
1358  << numRowEnt_h.extent (0) << " != getNodeNumRows() = "
1359  << lclNumRows << ".");
1360 #endif // HAVE_TPETRA_DEBUG
1361 
1362  // We're packing on host (since we can't read Teuchos data
1363  // structures on device), so let's fill the packed row offsets
1364  // on host first.
1365  k_ptrs = typename row_map_type::non_const_type ("Tpetra::CrsGraph::ptr",
1366  lclNumRows+1);
1367  typename row_map_type::non_const_type::HostMirror h_ptrs =
1368  create_mirror_view (k_ptrs);
1369 
1370  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1371  // the array of valid entry counts per row.
1372  //
1373  // Return value is the total number of entries in the matrix on
1374  // the calling process. It's cheap to compute and useful as a
1375  // sanity check.
1376  const size_t lclTotalNumEntries =
1377  computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
1378 #ifdef HAVE_TPETRA_DEBUG
1379  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1380  (static_cast<size_t> (h_ptrs.extent (0)) != lclNumRows + 1,
1381  std::logic_error, "(DynamicProfile branch) After packing h_ptrs, "
1382  "h_ptrs.extent(0) = " << h_ptrs.extent (0) << " != "
1383  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1384  {
1385  const size_t h_ptrs_lastEnt = h_ptrs(lclNumRows); // it's a host View
1386  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1387  (h_ptrs_lastEnt != lclTotalNumEntries, std::logic_error,
1388  "(DynamicProfile branch) After packing h_ptrs, h_ptrs(lclNumRows="
1389  << lclNumRows << ") = " << h_ptrs_lastEnt << " != total number "
1390  "of entries on the calling process = " << lclTotalNumEntries << ".");
1391  }
1392 #endif // HAVE_TPETRA_DEBUG
1393 
1394  // Allocate the arrays of packed column indices and values.
1395  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1396  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1397 
1398  // We need host views of the above, since 2-D storage lives on host.
1399  typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds);
1400  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1401 
1402  // Pack the column indices and values on the host.
1403  ArrayRCP<Array<LocalOrdinal> > lclInds2D = myGraph_->lclInds2D_;
1404  for (size_t row = 0; row < lclNumRows; ++row) {
1405  const size_t numEnt = numRowEnt_h(row);
1406  std::copy (lclInds2D[row].begin(),
1407  lclInds2D[row].begin() + numEnt,
1408  h_inds.data() + h_ptrs(row));
1409  std::copy (values2D_[row].begin(),
1410  values2D_[row].begin() + numEnt,
1411  h_vals.data() + h_ptrs(row));
1412  }
1413 
1414  // Copy the packed column indices and values to the device.
1415  Kokkos::deep_copy (k_inds, h_inds);
1416  Kokkos::deep_copy (k_vals, h_vals);
1417  // Copy the packed row offsets to the device too.
1418  // We didn't actually need them on device before.
1419  Kokkos::deep_copy (k_ptrs, h_ptrs);
1420  k_ptrs_const = k_ptrs; // const version of k_ptrs
1421 
1422 #ifdef HAVE_TPETRA_DEBUG
1423  // Sanity check of packed row offsets.
1424  if (k_ptrs.extent (0) != 0) {
1425  const size_t numOffsets = static_cast<size_t> (k_ptrs.extent (0));
1426  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1427  (numOffsets != lclNumRows + 1, std::logic_error, "(DynamicProfile "
1428  "branch) After copying into k_ptrs, k_ptrs.extent(0) = " <<
1429  numOffsets << " != (lclNumRows+1) = " << (lclNumRows+1) << ".");
1430 
1431  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, numOffsets-1);
1432  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1433  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1434  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1435  << (numOffsets-1) << ") = " << valToCheck << " != "
1436  "k_vals.extent(0) = " << k_vals.extent (0) << ".");
1437  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1438  (static_cast<size_t> (valToCheck) != k_inds.extent (0),
1439  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1440  << (numOffsets-1) << ") = " << valToCheck << " != "
1441  "k_inds.extent(0) = " << k_inds.extent (0) << ".");
1442  }
1443 #endif // HAVE_TPETRA_DEBUG
1444  }
1445  else if (getProfileType () == StaticProfile) {
1446  // StaticProfile means that the matrix's column indices and
1447  // values are currently stored in a 1-D format, with row offsets
1448  // in k_rowPtrs_ and local column indices in k_lclInds1D_.
1449 
1450  // StaticProfile also means that the graph's array of row
1451  // offsets must already be allocated.
1452  typename Graph::local_graph_type::row_map_type curRowOffsets =
1453  myGraph_->k_rowPtrs_;
1454 
1455 #ifdef HAVE_TPETRA_DEBUG
1456  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1457  (curRowOffsets.extent (0) == 0, std::logic_error,
1458  "(StaticProfile branch) curRowOffsets.extent(0) == 0.");
1459  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1460  (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1461  "(StaticProfile branch) curRowOffsets.extent(0) = "
1462  << curRowOffsets.extent (0) << " != lclNumRows + 1 = "
1463  << (lclNumRows + 1) << ".")
1464  {
1465  const size_t numOffsets = curRowOffsets.extent (0);
1466  const auto valToCheck =
1467  ::Tpetra::Details::getEntryOnHost (curRowOffsets, numOffsets - 1);
1468  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1469  (numOffsets != 0 &&
1470  myGraph_->k_lclInds1D_.extent (0) != valToCheck,
1471  std::logic_error, "(StaticProfile branch) numOffsets = " <<
1472  numOffsets << " != 0 and myGraph_->k_lclInds1D_.extent(0) = "
1473  << myGraph_->k_lclInds1D_.extent (0) << " != curRowOffsets("
1474  << numOffsets << ") = " << valToCheck << ".");
1475  }
1476 #endif // HAVE_TPETRA_DEBUG
1477 
1478  if (myGraph_->getNodeNumEntries () != myGraph_->getNodeAllocationSize ()) {
1479  // The matrix's current 1-D storage is "unpacked." This means
1480  // the row offsets may differ from what the final row offsets
1481  // should be. This could happen, for example, if the user
1482  // specified StaticProfile in the constructor and set an upper
1483  // bound on the number of entries per row, but didn't fill all
1484  // those entries.
1485 #ifdef HAVE_TPETRA_DEBUG
1486  if (curRowOffsets.extent (0) != 0) {
1487  const size_t numOffsets =
1488  static_cast<size_t> (curRowOffsets.extent (0));
1489  const auto valToCheck =
1490  ::Tpetra::Details::getEntryOnHost (curRowOffsets, numOffsets-1);
1491  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1492  (static_cast<size_t> (valToCheck) !=
1493  static_cast<size_t> (k_values1D_.extent (0)),
1494  std::logic_error, "(StaticProfile unpacked branch) Before "
1495  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1496  << valToCheck << " != k_values1D_.extent(0)"
1497  " = " << k_values1D_.extent (0) << ".");
1498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1499  (static_cast<size_t> (valToCheck) !=
1500  static_cast<size_t> (myGraph_->k_lclInds1D_.extent (0)),
1501  std::logic_error, "(StaticProfile unpacked branch) Before "
1502  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1503  << valToCheck
1504  << " != myGraph_->k_lclInds1D_.extent(0) = "
1505  << myGraph_->k_lclInds1D_.extent (0) << ".");
1506  }
1507 #endif // HAVE_TPETRA_DEBUG
1508 
1509  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1510  // the array of valid entry counts per row.
1511 
1512  // Total number of entries in the matrix on the calling
1513  // process. We will compute this in the loop below. It's
1514  // cheap to compute and useful as a sanity check.
1515  size_t lclTotalNumEntries = 0;
1516  // This will be a host view of packed row offsets.
1517  typename row_map_type::non_const_type::HostMirror h_ptrs;
1518  {
1519  // Allocate the packed row offsets array. We use a nonconst
1520  // temporary (packedRowOffsets) here, because k_ptrs is
1521  // const. We will assign packedRowOffsets to k_ptrs below.
1522  typename row_map_type::non_const_type
1523  packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
1524  typename row_entries_type::const_type numRowEnt_h =
1525  myGraph_->k_numRowEntries_;
1526  // We're computing offsets on device. This function can
1527  // handle numRowEnt_h being a host View.
1528  lclTotalNumEntries =
1529  computeOffsetsFromCounts (packedRowOffsets, numRowEnt_h);
1530  // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1531  // to use packedRowOffsets in the loop above and assign here.
1532  k_ptrs = packedRowOffsets;
1533  k_ptrs_const = k_ptrs;
1534  }
1535 
1536 #ifdef HAVE_TPETRA_DEBUG
1537  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1538  (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1539  std::logic_error,
1540  "(StaticProfile unpacked branch) After packing k_ptrs, "
1541  "k_ptrs.extent(0) = " << k_ptrs.extent (0) << " != "
1542  "lclNumRows+1 = " << (lclNumRows+1) << ".");
1543  {
1544  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, lclNumRows);
1545  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1546  (valToCheck != lclTotalNumEntries, std::logic_error,
1547  "(StaticProfile unpacked branch) After filling k_ptrs, "
1548  "k_ptrs(lclNumRows=" << lclNumRows << ") = " << valToCheck
1549  << " != total number of entries on the calling process = "
1550  << lclTotalNumEntries << ".");
1551  }
1552 #endif // HAVE_TPETRA_DEBUG
1553 
1554  // Allocate the arrays of packed column indices and values.
1555  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1556  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1557 
1558  // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_,
1559  // and k_values1D_ are currently unpacked. Pack them, using
1560  // the packed row offsets array k_ptrs that we created above.
1561  //
1562  // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1563  // need to keep around the unpacked row offsets, column
1564  // indices, and values arrays.
1565 
1566  // Pack the column indices from unpacked k_lclInds1D_ into
1567  // packed k_inds. We will replace k_lclInds1D_ below.
1568  typedef pack_functor<typename Graph::local_graph_type::entries_type::non_const_type,
1569  typename Graph::local_graph_type::row_map_type>
1570  inds_packer_type;
1571  inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_,
1572  k_ptrs, curRowOffsets);
1573  typedef typename decltype (k_inds)::execution_space exec_space;
1574  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
1575  Kokkos::parallel_for (range_type (0, lclNumRows), indsPacker);
1576 
1577  // Pack the values from unpacked k_values1D_ into packed
1578  // k_vals. We will replace k_values1D_ below.
1579  typedef pack_functor<values_type, row_map_type> vals_packer_type;
1580  vals_packer_type valsPacker (k_vals, this->k_values1D_,
1581  k_ptrs, curRowOffsets);
1582  Kokkos::parallel_for (range_type (0, lclNumRows), valsPacker);
1583 
1584 #ifdef HAVE_TPETRA_DEBUG
1585  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1586  (k_ptrs.extent (0) == 0, std::logic_error,
1587  "(StaticProfile \"Optimize Storage\" = "
1588  "true branch) After packing, k_ptrs.extent(0) = 0. This "
1589  "probably means that k_rowPtrs_ was never allocated.");
1590  if (k_ptrs.extent (0) != 0) {
1591  const size_t numOffsets = static_cast<size_t> (k_ptrs.extent (0));
1592  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, numOffsets - 1);
1593  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1594  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1595  std::logic_error,
1596  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1597  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1598  " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1599  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1600  (static_cast<size_t> (valToCheck) != k_inds.extent (0),
1601  std::logic_error,
1602  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1603  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1604  " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1605  }
1606 #endif // HAVE_TPETRA_DEBUG
1607  }
1608  else { // We don't have to pack, so just set the pointers.
1609  k_ptrs_const = myGraph_->k_rowPtrs_;
1610  k_inds = myGraph_->k_lclInds1D_;
1611  k_vals = this->k_values1D_;
1612 
1613 #ifdef HAVE_TPETRA_DEBUG
1614  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1615  (k_ptrs_const.extent (0) == 0, std::logic_error,
1616  "(StaticProfile \"Optimize Storage\"=false branch) "
1617  "k_ptrs_const.extent(0) = 0. This probably means that "
1618  "k_rowPtrs_ was never allocated.");
1619  if (k_ptrs_const.extent (0) != 0) {
1620  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.extent (0));
1621  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs_const, numOffsets - 1);
1622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1623  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1624  std::logic_error,
1625  "(StaticProfile \"Optimize Storage\"=false branch) "
1626  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1627  << " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1629  (static_cast<size_t> (valToCheck) != k_inds.extent (0),
1630  std::logic_error,
1631  "(StaticProfile \"Optimize Storage\" = false branch) "
1632  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1633  << " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1634  }
1635 #endif // HAVE_TPETRA_DEBUG
1636  }
1637  }
1638 
1639 #ifdef HAVE_TPETRA_DEBUG
1640  // Extra sanity checks.
1641  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1642  (static_cast<size_t> (k_ptrs_const.extent (0)) != lclNumRows + 1,
1643  std::logic_error, "After packing, k_ptrs_const.extent(0) = " <<
1644  k_ptrs_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
1645  << ".");
1646  if (k_ptrs_const.extent (0) != 0) {
1647  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.extent (0));
1648  const size_t k_ptrs_const_numOffsetsMinus1 =
1649  ::Tpetra::Details::getEntryOnHost (k_ptrs_const, numOffsets - 1);
1650  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1651  (k_ptrs_const_numOffsetsMinus1 != k_vals.extent (0),
1652  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1653  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_vals.extent(0)"
1654  " = " << k_vals.extent (0) << ".");
1655  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1656  (k_ptrs_const_numOffsetsMinus1 != k_inds.extent (0),
1657  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1658  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_inds.extent(0)"
1659  " = " << k_inds.extent (0) << ".");
1660  }
1661 #endif // HAVE_TPETRA_DEBUG
1662 
1663  // May we ditch the old allocations for the packed (and otherwise
1664  // "optimized") allocations, later in this routine? Optimize
1665  // storage if the graph is not static, or if the graph already has
1666  // optimized storage.
1667  const bool defaultOptStorage =
1668  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1669  const bool requestOptimizedStorage =
1670  (! params.is_null () && params->get ("Optimize Storage", defaultOptStorage)) ||
1671  (params.is_null () && defaultOptStorage);
1672 
1673  // The graph has optimized storage when indices are allocated,
1674  // myGraph_->k_numRowEntries_ is empty, and there are more than
1675  // zero rows on this process. It's impossible for the graph to
1676  // have dynamic profile (getProfileType() == DynamicProfile) and
1677  // be optimized (isStorageOptimized()).
1678  if (requestOptimizedStorage) {
1679  // Free the old, unpacked, unoptimized allocations.
1680  // Change the graph from dynamic to static allocation profile
1681 
1682  // Free graph data structures that are only needed for 2-D or
1683  // unpacked 1-D storage.
1684  myGraph_->lclInds2D_ = null; // legacy KokkosClassic 2-D storage
1685  myGraph_->k_numRowEntries_ = row_entries_type ();
1686 
1687  // Free the matrix's 2-D storage.
1688  this->values2D_ = null;
1689 
1690  // Keep the new 1-D packed allocations.
1691  myGraph_->k_rowPtrs_ = k_ptrs_const;
1692  myGraph_->k_lclInds1D_ = k_inds;
1693  this->k_values1D_ = k_vals;
1694 
1695  // Whatever graph was before, it's StaticProfile now.
1696  myGraph_->pftype_ = StaticProfile;
1697  myGraph_->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
1698  this->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
1699  }
1700 
1701  // Make the local graph, using the arrays of row offsets and
1702  // column indices that we built above. The local graph should be
1703  // null, but we delete it first so that any memory can be freed
1704  // before we allocate the new one.
1705  //
1706  // FIXME (mfh 06,28 Aug 2014) It would make more sense for
1707  // Tpetra::CrsGraph to have a protected method that accepts k_inds
1708  // and k_ptrs, and creates the local graph lclGraph_.
1709  myGraph_->lclGraph_ =
1710  typename Graph::local_graph_type (k_inds, k_ptrs_const);
1711 
1712  // Make the local matrix, using the local graph and vals array.
1713  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1714  getNodeNumCols (), k_vals,
1715  myGraph_->lclGraph_);
1716  }
1717 
1718  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1719  void
1721  fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1722  {
1723  using ::Tpetra::Details::ProfilingRegion;
1724  using Kokkos::create_mirror_view;
1725  using Teuchos::ArrayRCP;
1726  using Teuchos::Array;
1727  using Teuchos::null;
1728  using Teuchos::RCP;
1729  using Teuchos::rcp;
1730  typedef LocalOrdinal LO;
1731  typedef typename Graph::local_graph_type::row_map_type row_map_type;
1732  typedef typename row_map_type::non_const_type non_const_row_map_type;
1733  typedef typename local_matrix_type::values_type values_type;
1734 #ifdef HAVE_TPETRA_DEBUG
1735  const char tfecfFuncName[] = "fillLocalMatrix (called from fillComplete): ";
1736 #endif // HAVE_TPETRA_DEBUG
1737  ProfilingRegion regionFLM ("Tpetra::CrsMatrix::fillLocalMatrix");
1738 
1739  const size_t lclNumRows = getNodeNumRows();
1740 
1741  // The goals of this routine are first, to allocate and fill
1742  // packed 1-D storage (see below for an explanation) in the vals
1743  // array, and second, to give vals to the local matrix and
1744  // finalize the local matrix. We only need k_ptrs, the packed 1-D
1745  // row offsets, within the scope of this routine, since we're only
1746  // filling the local matrix here (use fillLocalGraphAndMatrix() to
1747  // fill both the graph and the matrix at the same time).
1748 
1749  // get data from staticGraph_
1750  ArrayRCP<Array<LO> > lclInds2D = staticGraph_->lclInds2D_;
1751  size_t nodeNumEntries = staticGraph_->getNodeNumEntries ();
1752  size_t nodeNumAllocated = staticGraph_->getNodeAllocationSize ();
1753  row_map_type k_rowPtrs_ = staticGraph_->lclGraph_.row_map;
1754 
1755  row_map_type k_ptrs; // "packed" row offsets array
1756  values_type k_vals; // "packed" values array
1757 
1758  // May we ditch the old allocations for the packed (and otherwise
1759  // "optimized") allocations, later in this routine? Request
1760  // optimized storage by default.
1761  bool requestOptimizedStorage = true;
1762  const bool default_OptimizeStorage =
1763  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1764  if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) {
1765  requestOptimizedStorage = false;
1766  }
1767  // If we're not allowed to change a static graph, then we can't
1768  // change the storage of the matrix, either. This means that if
1769  // the graph's storage isn't already optimized, we can't optimize
1770  // the matrix's storage either. Check and give warning, as
1771  // appropriate.
1772  if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) {
1773  TPETRA_ABUSE_WARNING(true, std::runtime_error,
1774  "You requested optimized storage by setting the"
1775  "\"Optimize Storage\" flag to \"true\" in the parameter list, or by virtue"
1776  "of default behavior. However, the associated CrsGraph was filled separately"
1777  "and requested not to optimize storage. Therefore, the CrsMatrix cannot"
1778  "optimize storage.");
1779  requestOptimizedStorage = false;
1780  }
1781 
1782  typedef decltype (staticGraph_->k_numRowEntries_) row_entries_type;
1783 
1784  if (getProfileType() != StaticProfile) {
1785  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1786  //
1787  // DynamicProfile means that the matrix's values are currently
1788  // stored in a 2-D "unpacked" format, in the array-of-arrays
1789  // values2D_. We allocate 1-D storage and then copy from 2-D
1790  // storage in values2D_ into 1-D storage in k_vals. Since we're
1791  // only allocating the local matrix here, not the local graph,
1792  // we don't need to keep the row offsets array, but we do need
1793  // it here temporarily in order to convert to 1-D storage. (The
1794  // allocStorage() function needs it.) We'll free ptrs later in
1795  // this method.
1796  //
1797  // FIXME (mfh 08 Aug 2014) If we're in this method, then the
1798  // graph should already have packed 1-D storage. Why can't we
1799  // just use the graph's current row offsets array?
1800 
1801  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1802  // the array of valid entry counts per row.
1803  //
1804  // Total number of entries in the matrix on the calling
1805  // process. We will compute this in the loop below. It's
1806  // cheap to compute and useful as a sanity check.
1807  size_t lclTotalNumEntries = 0;
1808  // This will be a host view of packed row offsets.
1809  typename non_const_row_map_type::HostMirror h_ptrs;
1810 
1811  typename row_entries_type::const_type numRowEnt_h =
1812  staticGraph_->k_numRowEntries_;
1813  {
1814  non_const_row_map_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1815  lclNumRows+1);
1816  // NOTE (mfh 27 Jun 2016) We need h_ptrs on host anyway, so
1817  // let's just compute offsets on host.
1818  h_ptrs = create_mirror_view (packedRowOffsets);
1820  lclTotalNumEntries = computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
1821  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1822  k_ptrs = packedRowOffsets;
1823  }
1824 
1825 #ifdef HAVE_TPETRA_DEBUG
1826  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1827  (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1828  std::logic_error, "In DynamicProfile branch, after packing k_ptrs, "
1829  "k_ptrs.extent(0) = " << k_ptrs.extent (0) << " != "
1830  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1831  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1832  (static_cast<size_t> (h_ptrs.extent (0)) != lclNumRows + 1,
1833  std::logic_error, "In DynamicProfile branch, after packing h_ptrs, "
1834  "h_ptrs.extent(0) = " << h_ptrs.extent (0) << " != "
1835  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1836  {
1837  const auto valToCheck = ::Tpetra::Details::getEntryOnHost (k_ptrs, lclNumRows);
1838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1839  (static_cast<size_t> (valToCheck) != lclTotalNumEntries,
1840  std::logic_error, "(DynamicProfile branch) After packing k_ptrs, "
1841  "k_ptrs(lclNumRows = " << lclNumRows << ") = " << valToCheck
1842  << " != total number of entries on the calling process = "
1843  << lclTotalNumEntries << ".");
1844  }
1845 #endif // HAVE_TPETRA_DEBUG
1846 
1847  // Allocate the array of packed values.
1848  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1849  // We need a host view of the above, since 2-D storage lives on host.
1850  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1851  // Pack the values on the host.
1852  for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1853  const size_t numEnt = numRowEnt_h(lclRow);
1854  std::copy (values2D_[lclRow].begin(),
1855  values2D_[lclRow].begin() + numEnt,
1856  h_vals.data() + h_ptrs(lclRow));
1857  }
1858  // Copy the packed values to the device.
1859  Kokkos::deep_copy (k_vals, h_vals);
1860 
1861 #ifdef HAVE_TPETRA_DEBUG
1862  // Sanity check of packed row offsets.
1863  if (k_ptrs.extent (0) != 0) {
1864  const size_t numOffsets = static_cast<size_t> (k_ptrs.extent (0));
1865  const auto valToCheck =
1866  ::Tpetra::Details::getEntryOnHost (k_ptrs, numOffsets - 1);
1867  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1868  (static_cast<size_t> (valToCheck) != k_vals.extent (0),
1869  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1870  << (numOffsets-1) << ") = " << valToCheck << " != "
1871  "k_vals.extent(0) = " << k_vals.extent (0) << ".");
1872  }
1873 #endif // HAVE_TPETRA_DEBUG
1874  }
1875  else if (getProfileType () == StaticProfile) {
1876  // StaticProfile means that the matrix's values are currently
1877  // stored in a 1-D format. However, this format is "unpacked";
1878  // it doesn't necessarily have the same row offsets as indicated
1879  // by the ptrs array returned by allocRowPtrs. This could
1880  // happen, for example, if the user specified StaticProfile in
1881  // the constructor and fixed the number of matrix entries in
1882  // each row, but didn't fill all those entries.
1883  //
1884  // As above, we don't need to keep the "packed" row offsets
1885  // array ptrs here, but we do need it here temporarily, so we
1886  // have to allocate it. We'll free ptrs later in this method.
1887  //
1888  // Note that this routine checks whether storage has already
1889  // been packed. This is a common case for solution of nonlinear
1890  // PDEs using the finite element method, as long as the
1891  // structure of the sparse matrix does not change between linear
1892  // solves.
1893  if (nodeNumEntries != nodeNumAllocated) {
1894  // We have to pack the 1-D storage, since the user didn't fill
1895  // up all requested storage.
1896  non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1897  lclNumRows+1);
1898  // Total number of entries in the matrix on the calling
1899  // process. We will compute this in the loop below. It's
1900  // cheap to compute and useful as a sanity check.
1901  size_t lclTotalNumEntries = 0;
1902  k_ptrs = tmpk_ptrs;
1903  {
1904  typename row_entries_type::const_type numRowEnt_d =
1905  staticGraph_->k_numRowEntries_;
1907  // This function can handle the counts being a host View.
1908  lclTotalNumEntries = computeOffsetsFromCounts (tmpk_ptrs, numRowEnt_d);
1909  }
1910 
1911  // Allocate the "packed" values array.
1912  // It has exactly the right number of entries.
1913  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1914 
1915  // Pack k_values1D_ into k_vals. We will replace k_values1D_ below.
1916  typedef pack_functor<values_type, row_map_type> packer_type;
1917  packer_type valsPacker (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_);
1918 
1919  typedef typename decltype (k_vals)::execution_space exec_space;
1920  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
1921  Kokkos::parallel_for (range_type (0, lclNumRows), valsPacker);
1922  }
1923  else { // We don't have to pack, so just set the pointer.
1924  k_vals = k_values1D_;
1925  }
1926  }
1927 
1928  // May we ditch the old allocations for the packed one?
1929  if (requestOptimizedStorage) {
1930  // The user requested optimized storage, so we can dump the
1931  // unpacked 2-D and 1-D storage, and keep the packed storage.
1932  values2D_ = null;
1933  k_values1D_ = k_vals;
1934  this->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
1935  }
1936 
1937  // Build the local sparse matrix object. At this point, the local
1938  // matrix certainly has a column Map. Remember that the local
1939  // matrix's number of columns comes from the column Map, not the
1940  // domain Map.
1941  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1942  getColMap ()->getNodeNumElements (),
1943  k_vals,
1944  staticGraph_->getLocalGraph ());
1945  }
1946 
1947  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1948  void
1950  insertIndicesAndValues (crs_graph_type& graph,
1951  RowInfo& rowInfo,
1952  const typename crs_graph_type::SLocalGlobalViews& newInds,
1953  const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1954  const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1955  const ELocalGlobal lg,
1956  const ELocalGlobal I)
1957  {
1958  const size_t oldNumEnt = rowInfo.numEntries;
1959  const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1960 
1961  // Use of memcpy here works around an issue with GCC >= 4.9.0,
1962  // that probably relates to scalar_type vs. impl_scalar_type
1963  // aliasing. See history of Tpetra_CrsGraph_def.hpp for
1964  // details; look for GCC_WORKAROUND macro definition.
1965  if (numInserted > 0) {
1966  const size_t startOffset = oldNumEnt;
1967  memcpy (&oldRowVals[startOffset], &newRowVals[0],
1968  numInserted * sizeof (impl_scalar_type));
1969  }
1970  }
1971 
1972  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1973  void
1975  insertLocalValues (const LocalOrdinal lclRow,
1976  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1977  const Teuchos::ArrayView<const Scalar>& values)
1978  {
1979  using std::endl;
1980  typedef impl_scalar_type IST;
1981  const char tfecfFuncName[] = "insertLocalValues: ";
1982 
1983  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1984  (! this->isFillActive (), std::runtime_error,
1985  "Fill is not active. After calling fillComplete, you must call "
1986  "resumeFill before you may insert entries into the matrix again.");
1987  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1988  (this->isStaticGraph (), std::runtime_error,
1989  "Cannot insert indices with static graph; use replaceLocalValues() "
1990  "instead.");
1991  // At this point, we know that myGraph_ is nonnull.
1992  crs_graph_type& graph = * (this->myGraph_);
1993  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1994  (graph.colMap_.is_null (), std::runtime_error,
1995  "Cannot insert local indices without a column map.");
1996  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1997  (graph.isGloballyIndexed (),
1998  std::runtime_error, "Graph indices are global; use "
1999  "insertGlobalValues().");
2000  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2001  (values.size () != indices.size (), std::runtime_error,
2002  "values.size() = " << values.size ()
2003  << " != indices.size() = " << indices.size () << ".");
2004  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2005  ! graph.rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
2006  "Local row index " << lclRow << " does not belong to this process.");
2007 
2008  if (! graph.indicesAreAllocated ()) {
2009  this->allocateValues (LocalIndices, GraphNotYetAllocated);
2010  }
2011 
2012  const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
2013 #ifdef HAVE_TPETRA_DEBUG
2014  // In a debug build, test whether any of the given column indices
2015  // are not in the column Map. Keep track of the invalid column
2016  // indices so we can tell the user about them.
2017  {
2018  using Teuchos::toString;
2019 
2020  const map_type& colMap = * (graph.colMap_);
2021  Teuchos::Array<LocalOrdinal> badColInds;
2022  bool allInColMap = true;
2023  for (size_t k = 0; k < numEntriesToAdd; ++k) {
2024  if (! colMap.isNodeLocalElement (indices[k])) {
2025  allInColMap = false;
2026  badColInds.push_back (indices[k]);
2027  }
2028  }
2029  if (! allInColMap) {
2030  std::ostringstream os;
2031  os << "You attempted to insert entries in owned row " << lclRow
2032  << ", at the following column indices: " << toString (indices)
2033  << "." << endl;
2034  os << "Of those, the following indices are not in the column Map on "
2035  "this process: " << toString (badColInds) << "." << endl << "Since "
2036  "the matrix has a column Map already, it is invalid to insert "
2037  "entries at those locations.";
2038  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2039  (true, std::invalid_argument, os.str ());
2040  }
2041  }
2042 #endif // HAVE_TPETRA_DEBUG
2043 
2044  RowInfo rowInfo = graph.getRowInfo (lclRow);
2045 
2046  if (this->getProfileType() == StaticProfile)
2047  {
2048  Teuchos::ArrayView<IST> valsView = this->getViewNonConst(rowInfo);
2049  auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset) {
2050  valsView[offset] += values[k]; };
2051  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2052  graph.insertLocalIndicesImpl(lclRow, indices, cb);
2053  }
2054  else
2055  {
2056  // NOTE (DYNAMICPROFILE_REMOVAL) (tjf Mar 2019) Remove with DynamicProfile
2057  const size_t curNumEnt = rowInfo.numEntries;
2058  const size_t newNumEnt = curNumEnt + numEntriesToAdd;
2059  if (newNumEnt > rowInfo.allocSize) {
2060  // This must be a nonconst reference, since we'll reallocate.
2061  Teuchos::Array<IST>& curVals = this->values2D_[lclRow];
2062  // Make space for the new matrix entries.
2063  // Teuchos::ArrayRCP::resize automatically copies over values on
2064  // reallocation.
2065  graph.lclInds2D_[rowInfo.localRow].resize (newNumEnt);
2066  curVals.resize (newNumEnt);
2067  rowInfo.allocSize = newNumEnt; // give rowInfo updated allocSize
2068  }
2069  typename crs_graph_type::SLocalGlobalViews indsView;
2070  indsView.linds = indices;
2071 
2072  Teuchos::ArrayView<IST> valsView = this->getViewNonConst (rowInfo);
2073  Teuchos::ArrayView<const IST> valsIn =
2074  Teuchos::av_reinterpret_cast<const IST> (values);
2075  this->insertIndicesAndValues (graph, rowInfo, indsView, valsView,
2076  valsIn, LocalIndices, LocalIndices);
2077 #ifdef HAVE_TPETRA_DEBUG
2078  const size_t chkNewNumEnt = graph.getNumEntriesInLocalRow (lclRow);
2079  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2080  (chkNewNumEnt != newNumEnt, std::logic_error,
2081  "The row should have " << newNumEnt << " entries after insert, but "
2082  "instead has " << chkNewNumEnt << ". Please report this bug to "
2083  "the Tpetra developers.");
2084 #endif // HAVE_TPETRA_DEBUG
2085  }
2086  }
2087 
2088  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2089  void
2091  insertLocalValues (const LocalOrdinal localRow,
2092  const LocalOrdinal numEnt,
2093  const Scalar vals[],
2094  const LocalOrdinal cols[])
2095  {
2096  Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
2097  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2098  this->insertLocalValues (localRow, colsT, valsT);
2099  }
2100 
2101  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2102  void
2104  insertGlobalValuesImpl (crs_graph_type& graph,
2105  RowInfo& rowInfo,
2106  const GlobalOrdinal gblColInds[],
2107  const impl_scalar_type vals[],
2108  const size_t numInputEnt)
2109  {
2110  typedef impl_scalar_type IST;
2111  typedef GlobalOrdinal GO;
2112 #ifdef HAVE_TPETRA_DEBUG
2113  const char tfecfFuncName[] = "insertGlobalValuesImpl: ";
2114  const size_t origNumEnt = graph.getNumEntriesInLocalRow (rowInfo.localRow);
2115 #endif // HAVE_TPETRA_DEBUG
2116 
2117  size_t newNumEnt = 0;
2118  const size_t curNumEnt = rowInfo.numEntries;
2119 
2120  if (! graph.indicesAreAllocated ()) {
2121  this->allocateValues (GlobalIndices, GraphNotYetAllocated);
2122  // mfh 23 Jul 2017: allocateValues invalidates existing
2123  // getRowInfo results. Once we get rid of lazy graph
2124  // allocation, we'll be able to move the getRowInfo call outside
2125  // of this method.
2126  rowInfo = graph.getRowInfo (rowInfo.localRow);
2127  }
2128 
2129  if (this->getProfileType () == StaticProfile) {
2130  Teuchos::ArrayView<IST> valsView = this->getViewNonConst(rowInfo);
2131  auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset) {
2132  valsView[offset] += vals[k]; };
2133  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2134  auto numInserted =
2135  graph.insertGlobalIndicesImpl(rowInfo, gblColInds, numInputEnt, cb);
2136  newNumEnt = curNumEnt + numInserted;
2137  }
2138  else {
2139  // NOTE (DYNAMICPROFILE_REMOVAL) remove this block
2140  newNumEnt = curNumEnt + numInputEnt;
2141  if (newNumEnt > rowInfo.allocSize) {
2142  // This needs to be a nonconst reference, in case we want to
2143  // reallocate it.
2144  Teuchos::Array<IST>& curVals = this->values2D_[rowInfo.localRow];
2145  // Teuchos::ArrayRCP::resize automatically copies over values on
2146  // reallocation.
2147  graph.gblInds2D_[rowInfo.localRow].resize (newNumEnt);
2148  curVals.resize (newNumEnt);
2149  rowInfo.allocSize = newNumEnt; // reassign for updated allocSize
2150  }
2151 
2152  using Teuchos::ArrayView;
2153  typename crs_graph_type::SLocalGlobalViews inputIndsAV;
2154  inputIndsAV.ginds = ArrayView<const GO> (gblColInds, numInputEnt);
2155  ArrayView<IST> curValsAV = this->getViewNonConst (rowInfo);
2156  ArrayView<const IST> inputValsAV (vals, numInputEnt);
2157 
2158  const ELocalGlobal curIndexingStatus =
2159  this->isGloballyIndexed () ? GlobalIndices : LocalIndices;
2160  // curIndexingStatus == GlobalIndices means the method calls
2161  // getGlobalViewNonConst() and does direct copying, which should
2162  // be reasonably fast. LocalIndices means the method calls the
2163  // Map's getLocalElement() method once per entry to insert. This
2164  // may be slow.
2165  this->insertIndicesAndValues (graph, rowInfo, inputIndsAV, curValsAV,
2166  inputValsAV, GlobalIndices,
2167  curIndexingStatus);
2168  }
2169 
2170 #ifdef HAVE_TPETRA_DEBUG
2171  const size_t chkNewNumEnt =
2172  graph.getNumEntriesInLocalRow (rowInfo.localRow);
2173  if (chkNewNumEnt != newNumEnt) {
2174  std::ostringstream os;
2175  os << std::endl << "newNumEnt = " << newNumEnt
2176  << " != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
2177  << ") = " << chkNewNumEnt << "." << std::endl
2178  << "\torigNumEnt: " << origNumEnt << std::endl
2179  << "\tnumInputEnt: " << numInputEnt << std::endl
2180  << "\tgblColInds: [";
2181  for (size_t k = 0; k < numInputEnt; ++k) {
2182  os << gblColInds[k];
2183  if (k + size_t (1) < numInputEnt) {
2184  os << ",";
2185  }
2186  }
2187  os << "]" << std::endl
2188  << "\tvals: [";
2189  for (size_t k = 0; k < numInputEnt; ++k) {
2190  os << vals[k];
2191  if (k + size_t (1) < numInputEnt) {
2192  os << ",";
2193  }
2194  }
2195  os << "]" << std::endl;
2196 
2197  if (this->supportsRowViews ()) {
2198  Teuchos::ArrayView<const Scalar> vals2;
2199  if (this->isGloballyIndexed ()) {
2200  Teuchos::ArrayView<const GlobalOrdinal> gblColInds2;
2201  const GlobalOrdinal gblRow =
2202  graph.rowMap_->getGlobalElement (rowInfo.localRow);
2203  if (gblRow == Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
2204  os << "Local row index " << rowInfo.localRow << " is invalid!" << std::endl;
2205  }
2206  else {
2207  bool getViewThrew = false;
2208  try {
2209  this->getGlobalRowView (gblRow, gblColInds2, vals2);
2210  }
2211  catch (std::exception& e) {
2212  getViewThrew = true;
2213  os << "getGlobalRowView threw exception:" << std::endl
2214  << e.what () << std::endl;
2215  }
2216  if (! getViewThrew) {
2217  os << "\tNew global column indices: "
2218  << Teuchos::toString (gblColInds2) << std::endl
2219  << "\tNew values: " << Teuchos::toString (vals2) << std::endl;
2220  }
2221  }
2222  }
2223  else if (this->isLocallyIndexed ()) {
2224  Teuchos::ArrayView<const LocalOrdinal> lclColInds2;
2225  this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
2226  os << "\tNew local column indices: " << Teuchos::toString (lclColInds2)
2227  << std::endl;
2228  os << "\tNew values: " << Teuchos::toString (vals2) << std::endl;
2229  }
2230  }
2231 
2232  os << "Please report this bug to the Tpetra developers.";
2233  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2234  (true, std::logic_error, os.str ());
2235  }
2236 #endif // HAVE_TPETRA_DEBUG
2237  }
2238 
2239  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2240  void
2242  insertGlobalValues (const GlobalOrdinal gblRow,
2243  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2244  const Teuchos::ArrayView<const Scalar>& values)
2245  {
2246  using Teuchos::toString;
2247  using std::endl;
2248  typedef impl_scalar_type IST;
2249  typedef LocalOrdinal LO;
2250  typedef GlobalOrdinal GO;
2251  typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2252  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
2253  const char tfecfFuncName[] = "insertGlobalValues: ";
2254 
2255 #ifdef HAVE_TPETRA_DEBUG
2256  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2257  (values.size () != indices.size (), std::runtime_error,
2258  "values.size() = " << values.size () << " != indices.size() = "
2259  << indices.size () << ".");
2260 #endif // HAVE_TPETRA_DEBUG
2261 
2262  // getRowMap() is not thread safe, because it increments RCP's
2263  // reference count. getCrsGraphRef() is thread safe.
2264  const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2265  const LO lclRow = rowMap.getLocalElement (gblRow);
2266 
2267  if (lclRow == OTLO::invalid ()) {
2268  // Input row is _not_ owned by the calling process.
2269  //
2270  // See a note (now deleted) from mfh 14 Dec 2012: If input row
2271  // is not in the row Map, it doesn't matter whether or not the
2272  // graph is static; the data just get stashed for later use by
2273  // globalAssemble().
2274  this->insertNonownedGlobalValues (gblRow, indices, values);
2275  }
2276  else { // Input row _is_ owned by the calling process
2277  if (this->isStaticGraph ()) {
2278  // Uh oh! Not allowed to insert into owned rows in that case.
2279  const int myRank = rowMap.getComm ()->getRank ();
2280  const int numProcs = rowMap.getComm ()->getSize ();
2281  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2282  (true, std::runtime_error,
2283  "The matrix was constructed with a constant (\"static\") graph, "
2284  "yet the given global row index " << gblRow << " is in the row "
2285  "Map on the calling process (with rank " << myRank << ", of " <<
2286  numProcs << " process(es)). In this case, you may not insert "
2287  "new entries into rows owned by the calling process.");
2288  }
2289 
2290  crs_graph_type& graph = * (this->myGraph_);
2291  const IST* const inputVals =
2292  reinterpret_cast<const IST*> (values.getRawPtr ());
2293  const GO* const inputGblColInds = indices.getRawPtr ();
2294  const size_t numInputEnt = indices.size ();
2295  RowInfo rowInfo = graph.getRowInfo (lclRow);
2296 
2297  // If the matrix has a column Map, check at this point whether
2298  // the column indices belong to the column Map.
2299  //
2300  // FIXME (mfh 16 May 2013) We may want to consider deferring the
2301  // test to the CrsGraph method, since it may have to do this
2302  // anyway.
2303  if (! graph.colMap_.is_null ()) {
2304  const map_type& colMap = * (graph.colMap_);
2305  // In a debug build, keep track of the nonowned ("bad") column
2306  // indices, so that we can display them in the exception
2307  // message. In a release build, just ditch the loop early if
2308  // we encounter a nonowned column index.
2309 #ifdef HAVE_TPETRA_DEBUG
2310  Teuchos::Array<GO> badColInds;
2311 #endif // HAVE_TPETRA_DEBUG
2312  const size_type numEntriesToInsert = indices.size ();
2313  bool allInColMap = true;
2314  for (size_type k = 0; k < numEntriesToInsert; ++k) {
2315  if (! colMap.isNodeGlobalElement (indices[k])) {
2316  allInColMap = false;
2317 #ifdef HAVE_TPETRA_DEBUG
2318  badColInds.push_back (indices[k]);
2319 #else
2320  break;
2321 #endif // HAVE_TPETRA_DEBUG
2322  }
2323  }
2324  if (! allInColMap) {
2325  std::ostringstream os;
2326  os << "You attempted to insert entries in owned row " << gblRow
2327  << ", at the following column indices: " << toString (indices)
2328  << "." << endl;
2329 #ifdef HAVE_TPETRA_DEBUG
2330  os << "Of those, the following indices are not in the column Map "
2331  "on this process: " << toString (badColInds) << "." << endl
2332  << "Since the matrix has a column Map already, it is invalid "
2333  "to insert entries at those locations.";
2334 #else
2335  os << "At least one of those indices is not in the column Map "
2336  "on this process." << endl << "It is invalid to insert into "
2337  "columns not in the column Map on the process that owns the "
2338  "row.";
2339 #endif // HAVE_TPETRA_DEBUG
2340  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2341  (true, std::invalid_argument, os.str ());
2342  }
2343  }
2344 
2345  this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2346  inputVals, numInputEnt);
2347  }
2348  }
2349 
2350 
2351  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2352  void
2354  insertGlobalValues (const GlobalOrdinal globalRow,
2355  const LocalOrdinal numEnt,
2356  const Scalar vals[],
2357  const GlobalOrdinal inds[])
2358  {
2359  Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2360  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2361  this->insertGlobalValues (globalRow, indsT, valsT);
2362  }
2363 
2364 
2365  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2366  void
2368  insertGlobalValuesFiltered (const GlobalOrdinal gblRow,
2369  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2370  const Teuchos::ArrayView<const Scalar>& values)
2371  {
2372  typedef impl_scalar_type IST;
2373  typedef LocalOrdinal LO;
2374  typedef GlobalOrdinal GO;
2375  typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2376  const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
2377 
2378 #ifdef HAVE_TPETRA_DEBUG
2379  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2380  (values.size () != indices.size (), std::runtime_error,
2381  "values.size() = " << values.size () << " != indices.size() = "
2382  << indices.size () << ".");
2383 #endif // HAVE_TPETRA_DEBUG
2384 
2385  // getRowMap() is not thread safe, because it increments RCP's
2386  // reference count. getCrsGraphRef() is thread safe.
2387  const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2388  const LO lclRow = rowMap.getLocalElement (gblRow);
2389 
2390  if (lclRow == OTLO::invalid ()) {
2391  // Input row is _not_ owned by the calling process.
2392  //
2393  // See a note (now deleted) from mfh 14 Dec 2012: If input row
2394  // is not in the row Map, it doesn't matter whether or not the
2395  // graph is static; the data just get stashed for later use by
2396  // globalAssemble().
2397  this->insertNonownedGlobalValues (gblRow, indices, values);
2398  }
2399  else { // Input row _is_ owned by the calling process
2400  if (this->isStaticGraph ()) {
2401  // Uh oh! Not allowed to insert into owned rows in that case.
2402  const int myRank = rowMap.getComm ()->getRank ();
2403  const int numProcs = rowMap.getComm ()->getSize ();
2404  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2405  (true, std::runtime_error,
2406  "The matrix was constructed with a constant (\"static\") graph, "
2407  "yet the given global row index " << gblRow << " is in the row "
2408  "Map on the calling process (with rank " << myRank << ", of " <<
2409  numProcs << " process(es)). In this case, you may not insert "
2410  "new entries into rows owned by the calling process.");
2411  }
2412 
2413  crs_graph_type& graph = * (this->myGraph_);
2414  const IST* const inputVals =
2415  reinterpret_cast<const IST*> (values.getRawPtr ());
2416  const GO* const inputGblColInds = indices.getRawPtr ();
2417  const size_t numInputEnt = indices.size ();
2418  RowInfo rowInfo = graph.getRowInfo (lclRow);
2419 
2420  if (! graph.colMap_.is_null ()) { // We have a column Map.
2421  const map_type& colMap = * (graph.colMap_);
2422  size_t curOffset = 0;
2423  while (curOffset < numInputEnt) {
2424  // Find a sequence of input indices that are in the column
2425  // Map on the calling process. Doing a sequence at a time,
2426  // instead of one at a time, amortizes some overhead.
2427  size_t endOffset = curOffset;
2428  for ( ; endOffset < numInputEnt &&
2429  colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2430  ++endOffset)
2431  {}
2432  // curOffset, endOffset: half-exclusive range of indices in
2433  // the column Map on the calling process. If endOffset ==
2434  // curOffset, the range is empty.
2435  const LO numIndInSeq = (endOffset - curOffset);
2436  if (numIndInSeq != 0) {
2437  this->insertGlobalValuesImpl (graph, rowInfo,
2438  inputGblColInds + curOffset,
2439  inputVals + curOffset,
2440  numIndInSeq);
2441  }
2442  // Invariant before the increment line: Either endOffset ==
2443  // numInputEnt, or inputGblColInds[endOffset] is not in the
2444  // column Map on the calling process.
2445 #ifdef HAVE_TPETRA_DEBUG
2446  const bool invariant = endOffset == numInputEnt ||
2447  colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2448  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2449  (! invariant, std::logic_error, std::endl << "Invariant failed!");
2450 #endif // HAVE_TPETRA_DEBUG
2451  curOffset = endOffset + 1;
2452  }
2453  }
2454  else { // we don't have a column Map.
2455  this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2456  inputVals, numInputEnt);
2457  }
2458  }
2459  }
2460 
2461  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2462  LocalOrdinal
2463  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2464  replaceLocalValuesImpl (impl_scalar_type rowVals[],
2465  const crs_graph_type& graph,
2466  const RowInfo& rowInfo,
2467  const LocalOrdinal inds[],
2468  const impl_scalar_type newVals[],
2469  const LocalOrdinal numElts) const
2470  {
2471  if (graph.getProfileType() == StaticProfile)
2472  {
2473  Teuchos::ArrayView<const LocalOrdinal> indsT(inds, numElts);
2474  auto fun =
2475  [&](size_t const k, size_t const /*start*/, size_t const offset) {
2476  rowVals[offset] = newVals[k];
2477  };
2478  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2479  return graph.findLocalIndices(rowInfo, indsT, cb);
2480  }
2481 
2482  // NOTE (DYNAMICPROFILE_REMOVAL) (tjf Mar 2019) from this point down can be
2483  // yanked once DynamicProfile is removed.
2484  typedef LocalOrdinal LO;
2485  typedef GlobalOrdinal GO;
2486  const bool sorted = graph.isSorted ();
2487 
2488  size_t hint = 0; // Guess for the current index k into rowVals
2489  LO numValid = 0; // number of valid local column indices
2490 
2491  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2492  // accurately, it assumes that the host execution space can
2493  // access data in both InputMemorySpace and ValsMemorySpace.
2494 
2495  if (graph.isLocallyIndexed ()) {
2496  // Get a view of the column indices in the row. This amortizes
2497  // the cost of getting the view over all the entries of inds.
2498  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2499 
2500  for (LO j = 0; j < numElts; ++j) {
2501  const LO lclColInd = inds[j];
2502  const size_t offset =
2503  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2504  lclColInd, hint, sorted);
2505  if (offset != rowInfo.numEntries) {
2506  rowVals[offset] = newVals[j];
2507  hint = offset + 1;
2508  ++numValid;
2509  }
2510  }
2511  }
2512  else if (graph.isGloballyIndexed ()) {
2513  if (graph.colMap_.is_null ()) {
2514  return Teuchos::OrdinalTraits<LO>::invalid ();
2515  }
2516  const map_type colMap = * (graph.colMap_);
2517 
2518  // Get a view of the column indices in the row. This amortizes
2519  // the cost of getting the view over all the entries of inds.
2520  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2521 
2522  for (LO j = 0; j < numElts; ++j) {
2523  const GO gblColInd = colMap.getGlobalElement (inds[j]);
2524  if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2525  const size_t offset =
2526  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2527  gblColInd, hint, sorted);
2528  if (offset != rowInfo.numEntries) {
2529  rowVals[offset] = newVals[j];
2530  hint = offset + 1;
2531  ++numValid;
2532  }
2533  }
2534  }
2535  }
2536  // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
2537  // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
2538  // to be neither locally nor globally indexed on a process.
2539  // This means that the graph or matrix has no entries on that
2540  // process. Epetra also works like this. It's related to lazy
2541  // allocation (on first insertion, not at graph / matrix
2542  // construction). Lazy allocation will go away because it is
2543  // not thread scalable.
2544 
2545  return numValid;
2546  }
2547 
2548  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2549  LocalOrdinal
2551  replaceLocalValues (const LocalOrdinal localRow,
2552  const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2553  const Teuchos::ArrayView<const Scalar>& vals) const
2554  {
2555  typedef LocalOrdinal LO;
2556 
2557  const LO numInputEnt = static_cast<LO> (lclCols.size ());
2558  if (static_cast<LO> (vals.size ()) != numInputEnt) {
2559  return Teuchos::OrdinalTraits<LO>::invalid ();
2560  }
2561  const LO* const inputInds = lclCols.getRawPtr ();
2562  const Scalar* const inputVals = vals.getRawPtr ();
2563  return this->replaceLocalValues (localRow, numInputEnt,
2564  inputVals, inputInds);
2565  }
2566 
2567  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2568  LocalOrdinal
2570  replaceLocalValues (const LocalOrdinal localRow,
2571  const LocalOrdinal numEnt,
2572  const Scalar inputVals[],
2573  const LocalOrdinal inputCols[]) const
2574  {
2575  typedef impl_scalar_type IST;
2576  typedef LocalOrdinal LO;
2577 
2578  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2579  // Fill must be active and the "nonconst" graph must exist.
2580  return Teuchos::OrdinalTraits<LO>::invalid ();
2581  }
2582  const crs_graph_type& graph = * (this->staticGraph_);
2583  const RowInfo rowInfo = graph.getRowInfo (localRow);
2584 
2585  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2586  // The calling process does not own this row, so it is not
2587  // allowed to modify its values.
2588  return static_cast<LO> (0);
2589  }
2590  auto curRowVals = this->getRowViewNonConst (rowInfo);
2591  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2592  return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2593  inputCols, inVals, numEnt);
2594  }
2595 
2596  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2597  LocalOrdinal
2599  replaceGlobalValuesImpl (impl_scalar_type rowVals[],
2600  const crs_graph_type& graph,
2601  const RowInfo& rowInfo,
2602  const GlobalOrdinal inds[],
2603  const impl_scalar_type newVals[],
2604  const LocalOrdinal numElts) const
2605  {
2606  if (graph.getProfileType() == StaticProfile)
2607  {
2608  Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2609  auto fun =
2610  [&](size_t const k, size_t const /*start*/, size_t const offset) {
2611  rowVals[offset] = newVals[k];
2612  };
2613  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2614  return graph.findGlobalIndices(rowInfo, indsT, cb);
2615  }
2616 
2617  // NOTE (DYNAMICPROFILE_REMOVAL) (tjf Mar 2019) from this point down can be
2618  // yanked once DynamicProfile is removed.
2619  typedef LocalOrdinal LO;
2620  typedef GlobalOrdinal GO;
2621 
2622  const bool sorted = graph.isSorted ();
2623 
2624  size_t hint = 0; // guess at the index's relative offset in the row
2625  LO numValid = 0; // number of valid input column indices
2626 
2627  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2628  // accurately, it assumes that the host execution space can
2629  // access data in all the Views.
2630 
2631  if (graph.isLocallyIndexed ()) {
2632  // NOTE (mfh 04 Nov 2015) Dereferencing an RCP or reading its
2633  // pointer does NOT change its reference count. Thus, this
2634  // code is still thread safe.
2635  if (graph.colMap_.is_null ()) {
2636  // NO input column indices are valid in this case, since if
2637  // the column Map is null on the calling process, then the
2638  // calling process owns no graph entries.
2639  return numValid;
2640  }
2641  const map_type& colMap = * (graph.colMap_);
2642 
2643  // Get a view of the column indices in the row. This amortizes
2644  // the cost of getting the view over all the entries of inds.
2645  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2646  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2647  for (LO j = 0; j < numElts; ++j) {
2648  const LO lclColInd = colMap.getLocalElement (inds[j]);
2649  if (lclColInd != LINV) {
2650  const size_t offset =
2651  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2652  lclColInd, hint, sorted);
2653  if (offset != rowInfo.numEntries) {
2654  rowVals[offset] = newVals[j];
2655  hint = offset + 1;
2656  numValid++;
2657  }
2658  }
2659  }
2660  }
2661  else if (graph.isGloballyIndexed ()) {
2662  // Get a view of the column indices in the row. This amortizes
2663  // the cost of getting the view over all the entries of inds.
2664  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2665 
2666  for (LO j = 0; j < numElts; ++j) {
2667  const GO gblColInd = inds[j];
2668  const size_t offset =
2669  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2670  gblColInd, hint, sorted);
2671  if (offset != rowInfo.numEntries) {
2672  rowVals[offset] = newVals[j];
2673  hint = offset + 1;
2674  numValid++;
2675  }
2676  }
2677  }
2678  // If the graph is neither locally nor globally indexed on the
2679  // calling process, that means the calling process has no graph
2680  // entries. Thus, none of the input column indices are valid.
2681 
2682  return numValid;
2683  }
2684 
2685  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2686  LocalOrdinal
2688  replaceGlobalValues (const GlobalOrdinal globalRow,
2689  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2690  const Teuchos::ArrayView<const Scalar>& inputVals) const
2691  {
2692  typedef LocalOrdinal LO;
2693 
2694  const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2695  if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2696  return Teuchos::OrdinalTraits<LO>::invalid ();
2697  }
2698  return this->replaceGlobalValues (globalRow, numInputEnt,
2699  inputVals.getRawPtr (),
2700  inputGblColInds.getRawPtr ());
2701  }
2702 
2703  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2704  LocalOrdinal
2706  replaceGlobalValues (const GlobalOrdinal globalRow,
2707  const LocalOrdinal numEnt,
2708  const Scalar inputVals[],
2709  const GlobalOrdinal inputGblColInds[]) const
2710  {
2711  typedef impl_scalar_type IST;
2712  typedef LocalOrdinal LO;
2713 
2714  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2715  // Fill must be active and the "nonconst" graph must exist.
2716  return Teuchos::OrdinalTraits<LO>::invalid ();
2717  }
2718  const crs_graph_type& graph = * (this->staticGraph_);
2719 
2720  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (globalRow);
2721  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2722  // The input local row is invalid on the calling process,
2723  // which means that the calling process summed 0 entries.
2724  return static_cast<LO> (0);
2725  }
2726 
2727  auto curRowVals = this->getRowViewNonConst (rowInfo);
2728  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2729  return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2730  inputGblColInds, inVals, numEnt);
2731  }
2732 
2733  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2734  LocalOrdinal
2736  sumIntoGlobalValuesImpl (impl_scalar_type rowVals[],
2737  const crs_graph_type& graph,
2738  const RowInfo& rowInfo,
2739  const GlobalOrdinal inds[],
2740  const impl_scalar_type newVals[],
2741  const LocalOrdinal numElts,
2742  const bool atomic) const
2743  {
2744  if (graph.getProfileType() == StaticProfile)
2745  {
2746  Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2747  auto fun =
2748  [&](size_t const k, size_t const /*start*/, size_t const offset) {
2749  if (atomic)
2750  Kokkos::atomic_add(&rowVals[offset], newVals[k]);
2751  else
2752  rowVals[offset] += newVals[k];
2753  };
2754  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2755  return graph.findGlobalIndices(rowInfo, indsT, cb);
2756  }
2757 
2758  // NOTE (DYNAMICPROFILE_REMOVAL) (tjf Mar 2019) from this point down can be
2759  // yanked once DynamicProfile is removed.
2760  typedef LocalOrdinal LO;
2761  typedef GlobalOrdinal GO;
2762 
2763  const bool sorted = graph.isSorted ();
2764 
2765  size_t hint = 0; // guess at the index's relative offset in the row
2766  LO numValid = 0; // number of valid input column indices
2767 
2768  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2769  // accurately, it assumes that the host execution space can
2770  // access data in both InputMemorySpace and ValsMemorySpace.
2771 
2772  if (graph.isLocallyIndexed ()) {
2773  // NOTE (mfh 04 Nov 2015) Dereferencing an RCP or reading its
2774  // pointer does NOT change its reference count. Thus, this
2775  // code is still thread safe.
2776  if (graph.colMap_.is_null ()) {
2777  // NO input column indices are valid in this case, since if
2778  // the column Map is null on the calling process, then the
2779  // calling process owns no graph entries.
2780  return numValid;
2781  }
2782  const map_type& colMap = * (graph.colMap_);
2783 
2784  // Get a view of the column indices in the row. This amortizes
2785  // the cost of getting the view over all the entries of inds.
2786  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2787  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2788 
2789  for (LO j = 0; j < numElts; ++j) {
2790  const LO lclColInd = colMap.getLocalElement (inds[j]);
2791  if (lclColInd != LINV) {
2792  const size_t offset =
2793  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2794  lclColInd, hint, sorted);
2795  if (offset != rowInfo.numEntries) {
2796  if (atomic) {
2797  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2798  }
2799  else {
2800  rowVals[offset] += newVals[j];
2801  }
2802  hint = offset + 1;
2803  numValid++;
2804  }
2805  }
2806  }
2807  }
2808  else if (graph.isGloballyIndexed ()) {
2809  // Get a view of the column indices in the row. This amortizes
2810  // the cost of getting the view over all the entries of inds.
2811  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2812 
2813  for (LO j = 0; j < numElts; ++j) {
2814  const GO gblColInd = inds[j];
2815  const size_t offset =
2816  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2817  gblColInd, hint, sorted);
2818  if (offset != rowInfo.numEntries) {
2819  if (atomic) {
2820  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2821  }
2822  else {
2823  rowVals[offset] += newVals[j];
2824  }
2825  hint = offset + 1;
2826  numValid++;
2827  }
2828  }
2829  }
2830  // If the graph is neither locally nor globally indexed on the
2831  // calling process, that means the calling process has no graph
2832  // entries. Thus, none of the input column indices are valid.
2833 
2834  return numValid;
2835  }
2836 
2837  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2838  LocalOrdinal
2840  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2841  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2842  const Teuchos::ArrayView<const Scalar>& inputVals,
2843  const bool atomic)
2844  {
2845  typedef LocalOrdinal LO;
2846 
2847  const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2848  if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2849  return Teuchos::OrdinalTraits<LO>::invalid ();
2850  }
2851  return this->sumIntoGlobalValues (gblRow, numInputEnt,
2852  inputVals.getRawPtr (),
2853  inputGblColInds.getRawPtr (),
2854  atomic);
2855  }
2856 
2857  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2858  LocalOrdinal
2860  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2861  const LocalOrdinal numInputEnt,
2862  const Scalar inputVals[],
2863  const GlobalOrdinal inputGblColInds[],
2864  const bool atomic)
2865  {
2866  typedef impl_scalar_type IST;
2867  typedef LocalOrdinal LO;
2868  typedef GlobalOrdinal GO;
2869 
2870  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2871  // Fill must be active and the "nonconst" graph must exist.
2872  return Teuchos::OrdinalTraits<LO>::invalid ();
2873  }
2874  const crs_graph_type& graph = * (this->staticGraph_);
2875 
2876  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2877  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2878  // mfh 23 Mar 2017, 26 Jul 2017: This branch may not be not
2879  // thread safe in a debug build, in part because it uses
2880  // Teuchos::ArrayView, and in part because of the data structure
2881  // used to stash outgoing entries.
2882  using Teuchos::ArrayView;
2883  ArrayView<const GO> inputGblColInds_av (numInputEnt == 0 ? NULL :
2884  inputGblColInds, numInputEnt);
2885  ArrayView<const Scalar> inputVals_av (numInputEnt == 0 ? NULL :
2886  inputVals, numInputEnt);
2887  // gblRow is not in the row Map on the calling process, so stash
2888  // the given entries away in a separate data structure.
2889  // globalAssemble() (called during fillComplete()) will exchange
2890  // that data and sum it in using sumIntoGlobalValues().
2891  this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2892  inputVals_av);
2893  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2894  // since we won't know whether the given indices were valid
2895  // until globalAssemble (called in fillComplete) is called.
2896  // That's why insertNonownedGlobalValues doesn't return
2897  // anything. Just for consistency, I'll return the number of
2898  // entries that the user gave us.
2899  return numInputEnt;
2900  }
2901  else { // input row is in the row Map on the calling process
2902  auto curRowVals = this->getRowViewNonConst (rowInfo);
2903  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2904  return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2905  inputGblColInds, inVals,
2906  numInputEnt, atomic);
2907  }
2908  }
2909 
2910  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2911  LocalOrdinal
2913  transformLocalValues (const LocalOrdinal lclRow,
2914  const LocalOrdinal numInputEnt,
2915  const impl_scalar_type inputVals[],
2916  const LocalOrdinal inputCols[],
2917  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2918  const bool atomic) const
2919  {
2920  using Tpetra::Details::OrdinalTraits;
2921  typedef LocalOrdinal LO;
2922 
2923  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2924  // Fill must be active and the "nonconst" graph must exist.
2925  return Teuchos::OrdinalTraits<LO>::invalid ();
2926  }
2927  const crs_graph_type& graph = * (this->staticGraph_);
2928  const RowInfo rowInfo = graph.getRowInfo (lclRow);
2929 
2930  if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2931  // The calling process does not own this row, so it is not
2932  // allowed to modify its values.
2933  return static_cast<LO> (0);
2934  }
2935  auto curRowVals = this->getRowViewNonConst (rowInfo);
2936  return this->transformLocalValues (curRowVals.data (), graph,
2937  rowInfo, inputCols, inputVals,
2938  numInputEnt, f, atomic);
2939  }
2940 
2941  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2942  LocalOrdinal
2943  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2944  transformGlobalValues (const GlobalOrdinal gblRow,
2945  const LocalOrdinal numInputEnt,
2946  const impl_scalar_type inputVals[],
2947  const GlobalOrdinal inputCols[],
2948  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2949  const bool atomic) const
2950  {
2951  using Tpetra::Details::OrdinalTraits;
2952  typedef LocalOrdinal LO;
2953 
2954  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2955  // Fill must be active and the "nonconst" graph must exist.
2956  return OrdinalTraits<LO>::invalid ();
2957  }
2958  const crs_graph_type& graph = * (this->staticGraph_);
2959  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2960 
2961  if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2962  // The calling process does not own this row, so it is not
2963  // allowed to modify its values.
2964  return static_cast<LO> (0);
2965  }
2966  auto curRowVals = this->getRowViewNonConst (rowInfo);
2967  return this->transformGlobalValues (curRowVals.data (), graph,
2968  rowInfo, inputCols, inputVals,
2969  numInputEnt, f, atomic);
2970  }
2971 
2972  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2973  LocalOrdinal
2974  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2975  transformLocalValues (impl_scalar_type rowVals[],
2976  const crs_graph_type& graph,
2977  const RowInfo& rowInfo,
2978  const LocalOrdinal inds[],
2979  const impl_scalar_type newVals[],
2980  const LocalOrdinal numElts,
2981  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2982  const bool atomic) const
2983  {
2984  typedef impl_scalar_type ST;
2985  typedef LocalOrdinal LO;
2986  typedef GlobalOrdinal GO;
2987 
2988  if (graph.getProfileType() == StaticProfile)
2989  {
2990  auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset)
2991  {
2992  if (atomic) {
2993  // NOTE (mfh 30 Nov 2015) The commented-out code is
2994  // wrong because another thread may have changed
2995  // rowVals[offset] between those two lines of code.
2996  volatile ST* const dest = &rowVals[offset];
2997  (void) atomic_binary_function_update (dest, newVals[k], f);
2998  }
2999  else {
3000  // use binary function f
3001  rowVals[offset] = f(rowVals[offset], newVals[k]);
3002  }
3003  };
3004  Teuchos::ArrayView<const LO> indsT(inds, numElts);
3005  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
3006  return graph.findLocalIndices(rowInfo, indsT, cb);
3007  }
3008 
3009  // NOTE (DYNAMICPROFILE REMOVAL) (tjf Mar 2019)
3010  // from this point down can be yanked once DynamicProfile is removed.
3011 
3012  //if (newVals.extent (0) != inds.extent (0)) {
3013  // The sizes of the input arrays must match.
3014  //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
3015  //}
3016  //const LO numElts = static_cast<LO> (inds.extent (0));
3017  const bool sorted = graph.isSorted ();
3018 
3019  LO numValid = 0; // number of valid input column indices
3020  size_t hint = 0; // Guess for the current index k into rowVals
3021 
3022  if (graph.isLocallyIndexed ()) {
3023  // Get a view of the column indices in the row. This amortizes
3024  // the cost of getting the view over all the entries of inds.
3025  auto colInds = graph.getLocalKokkosRowView (rowInfo);
3026 
3027  for (LO j = 0; j < numElts; ++j) {
3028  const LO lclColInd = inds[j];
3029  const size_t offset =
3030  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3031  lclColInd, hint, sorted);
3032  if (offset != rowInfo.numEntries) {
3033  if (atomic) {
3034  // NOTE (mfh 30 Nov 2015) The commented-out code is
3035  // wrong because another thread may have changed
3036  // rowVals[offset] between those two lines of code.
3037  //
3038  //const ST newVal = f (rowVals[offset], newVals[j]);
3039  //Kokkos::atomic_assign (&rowVals[offset], newVal);
3040 
3041  volatile ST* const dest = &rowVals[offset];
3042  (void) atomic_binary_function_update (dest, newVals[j], f);
3043  }
3044  else {
3045  // use binary function f
3046  rowVals[offset] = f (rowVals[offset], newVals[j]);
3047  }
3048  hint = offset + 1;
3049  ++numValid;
3050  }
3051  }
3052  }
3053  else if (graph.isGloballyIndexed ()) {
3054  // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
3055  // pointer does NOT change its reference count. Thus, this
3056  // code is still thread safe.
3057  if (graph.colMap_.is_null ()) {
3058  // NO input column indices are valid in this case. Either
3059  // the column Map hasn't been set yet (so local indices
3060  // don't exist yet), or the calling process owns no graph
3061  // entries.
3062  return numValid;
3063  }
3064  const map_type& colMap = * (graph.colMap_);
3065  // Get a view of the column indices in the row. This amortizes
3066  // the cost of getting the view over all the entries of inds.
3067  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
3068 
3069  const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
3070  for (LO j = 0; j < numElts; ++j) {
3071  const GO gblColInd = colMap.getGlobalElement (inds[j]);
3072  if (gblColInd != GINV) {
3073  const size_t offset =
3074  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3075  gblColInd, hint, sorted);
3076  if (offset != rowInfo.numEntries) {
3077  if (atomic) {
3078  // NOTE (mfh 30 Nov 2015) The commented-out code is
3079  // wrong because another thread may have changed
3080  // rowVals[offset] between those two lines of code.
3081  //
3082  //const ST newVal = f (rowVals[offset], newVals[j]);
3083  //Kokkos::atomic_assign (&rowVals[offset], newVal);
3084 
3085  volatile ST* const dest = &rowVals[offset];
3086  (void) atomic_binary_function_update (dest, newVals[j], f);
3087  }
3088  else {
3089  // use binary function f
3090  rowVals[offset] = f (rowVals[offset], newVals[j]);
3091  }
3092  hint = offset + 1;
3093  numValid++;
3094  }
3095  }
3096  }
3097  }
3098  // If the graph is neither locally nor globally indexed on the
3099  // calling process, that means the calling process has no graph
3100  // entries. Thus, none of the input column indices are valid.
3101 
3102  return numValid;
3103  }
3104 
3105  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3106  LocalOrdinal
3107  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3108  transformGlobalValues (impl_scalar_type rowVals[],
3109  const crs_graph_type& graph,
3110  const RowInfo& rowInfo,
3111  const GlobalOrdinal inds[],
3112  const impl_scalar_type newVals[],
3113  const LocalOrdinal numElts,
3114  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
3115  const bool atomic) const
3116  {
3117  typedef impl_scalar_type ST;
3118  typedef LocalOrdinal LO;
3119  typedef GlobalOrdinal GO;
3120 
3121  if (graph.getProfileType() == StaticProfile)
3122  {
3123  auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset)
3124  {
3125  if (atomic) {
3126  volatile ST* const dest = &rowVals[offset];
3127  (void) atomic_binary_function_update(dest, newVals[k], f);
3128  }
3129  else {
3130  // use binary function f
3131  rowVals[offset] = f (rowVals[offset], newVals[k]);
3132  }
3133  };
3134  Teuchos::ArrayView<const GO> indsT(inds, numElts);
3135  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
3136  return graph.findGlobalIndices(rowInfo, indsT, cb);
3137  }
3138 
3139  // NOTE (DYNAMICPROFILE REMOVAL) (tjf Mar 2019)
3140  // from this point down can be yanked once DynamicProfile is removed.
3141 
3142  //if (newVals.extent (0) != inds.extent (0)) {
3143  // The sizes of the input arrays must match.
3144  //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
3145  //}
3146  //const LO numElts = static_cast<LO> (inds.extent (0));
3147  const bool sorted = graph.isSorted ();
3148 
3149  LO numValid = 0; // number of valid input column indices
3150  size_t hint = 0; // Guess for the current index k into rowVals
3151 
3152  if (graph.isGloballyIndexed ()) {
3153  // Get a view of the column indices in the row. This amortizes
3154  // the cost of getting the view over all the entries of inds.
3155  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
3156 
3157  for (LO j = 0; j < numElts; ++j) {
3158  const GO gblColInd = inds[j];
3159  const size_t offset =
3160  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3161  gblColInd, hint, sorted);
3162  if (offset != rowInfo.numEntries) {
3163  if (atomic) {
3164  // NOTE (mfh 30 Nov 2015) The commented-out code is
3165  // wrong because another thread may have changed
3166  // rowVals[offset] between those two lines of code.
3167  //
3168  //const ST newVal = f (rowVals[offset], newVals[j]);
3169  //Kokkos::atomic_assign (&rowVals[offset], newVal);
3170 
3171  volatile ST* const dest = &rowVals[offset];
3172  (void) atomic_binary_function_update (dest, newVals[j], f);
3173  }
3174  else {
3175  // use binary function f
3176  rowVals[offset] = f (rowVals[offset], newVals[j]);
3177  }
3178  hint = offset + 1;
3179  ++numValid;
3180  }
3181  }
3182  }
3183  else if (graph.isLocallyIndexed ()) {
3184  // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
3185  // pointer does NOT change its reference count. Thus, this
3186  // code is still thread safe.
3187  if (graph.colMap_.is_null ()) {
3188  // NO input column indices are valid in this case. Either the
3189  // column Map hasn't been set yet (so local indices don't
3190  // exist yet), or the calling process owns no graph entries.
3191  return numValid;
3192  }
3193  const map_type& colMap = * (graph.colMap_);
3194  // Get a view of the column indices in the row. This amortizes
3195  // the cost of getting the view over all the entries of inds.
3196  auto colInds = graph.getLocalKokkosRowView (rowInfo);
3197 
3198  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
3199  for (LO j = 0; j < numElts; ++j) {
3200  const LO lclColInd = colMap.getLocalElement (inds[j]);
3201  if (lclColInd != LINV) {
3202  const size_t offset =
3203  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3204  lclColInd, hint, sorted);
3205  if (offset != rowInfo.numEntries) {
3206  if (atomic) {
3207  // NOTE (mfh 30 Nov 2015) The commented-out code is
3208  // wrong because another thread may have changed
3209  // rowVals[offset] between those two lines of code.
3210  //
3211  //const ST newVal = f (rowVals[offset], newVals[j]);
3212  //Kokkos::atomic_assign (&rowVals[offset], newVal);
3213 
3214  volatile ST* const dest = &rowVals[offset];
3215  (void) atomic_binary_function_update (dest, newVals[j], f);
3216  }
3217  else {
3218  // use binary function f
3219  rowVals[offset] = f (rowVals[offset], newVals[j]);
3220  }
3221  hint = offset + 1;
3222  numValid++;
3223  }
3224  }
3225  }
3226  }
3227  // If the graph is neither locally nor globally indexed on the
3228  // calling process, that means the calling process has no graph
3229  // entries. Thus, none of the input column indices are valid.
3230 
3231  return numValid;
3232  }
3233 
3234  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3235  LocalOrdinal
3236  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3237  sumIntoLocalValuesImpl (impl_scalar_type rowVals[],
3238  const crs_graph_type& graph,
3239  const RowInfo& rowInfo,
3240  const LocalOrdinal inds[],
3241  const impl_scalar_type newVals[],
3242  const LocalOrdinal numElts,
3243  const bool atomic) const
3244  {
3245  if (graph.getProfileType() == StaticProfile)
3246  {
3247  Teuchos::ArrayView<const LocalOrdinal> indsT(inds, numElts);
3248  auto fun =
3249  [&](size_t const k, size_t const /*start*/, size_t const offset) {
3250  if (atomic)
3251  Kokkos::atomic_add(&rowVals[offset], newVals[k]);
3252  else
3253  rowVals[offset] += newVals[k];
3254  };
3255  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
3256  return graph.findLocalIndices(rowInfo, indsT, cb);
3257  }
3258 
3259  // NOTE (DYNAMICPROFILE REMOVAL) (tjf Mar 2019)
3260  // from this point down can be yanked once DynamicProfile is removed.
3261 
3262  typedef LocalOrdinal LO;
3263  typedef GlobalOrdinal GO;
3264 
3265  const bool sorted = graph.isSorted ();
3266 
3267  size_t hint = 0; // Guess for the current index k into rowVals
3268  LO numValid = 0; // number of valid local column indices
3269 
3270  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
3271  // accurately, it assumes that the host execution space can
3272  // access data in both InputMemorySpace and ValsMemorySpace.
3273 
3274  if (graph.isLocallyIndexed ()) {
3275  // Get a view of the column indices in the row. This amortizes
3276  // the cost of getting the view over all the entries of inds.
3277  auto colInds = graph.getLocalKokkosRowView (rowInfo);
3278 
3279  for (LO j = 0; j < numElts; ++j) {
3280  const LO lclColInd = inds[j];
3281  const size_t offset =
3282  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3283  lclColInd, hint, sorted);
3284  if (offset != rowInfo.numEntries) {
3285  if (atomic) {
3286  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3287  }
3288  else {
3289  rowVals[offset] += newVals[j];
3290  }
3291  hint = offset + 1;
3292  ++numValid;
3293  }
3294  }
3295  }
3296  else if (graph.isGloballyIndexed ()) {
3297  if (graph.colMap_.is_null ()) {
3298  return Teuchos::OrdinalTraits<LO>::invalid ();
3299  }
3300  const map_type colMap = * (graph.colMap_);
3301 
3302  // Get a view of the column indices in the row. This amortizes
3303  // the cost of getting the view over all the entries of inds.
3304  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
3305 
3306  for (LO j = 0; j < numElts; ++j) {
3307  const GO gblColInd = colMap.getGlobalElement (inds[j]);
3308  if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3309  const size_t offset =
3310  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3311  gblColInd, hint, sorted);
3312  if (offset != rowInfo.numEntries) {
3313  if (atomic) {
3314  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3315  }
3316  else {
3317  rowVals[offset] += newVals[j];
3318  }
3319  hint = offset + 1;
3320  ++numValid;
3321  }
3322  }
3323  }
3324  }
3325  // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
3326  // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
3327  // to be neither locally nor globally indexed on a process.
3328  // This means that the graph or matrix has no entries on that
3329  // process. Epetra also works like this. It's related to lazy
3330  // allocation (on first insertion, not at graph / matrix
3331  // construction). Lazy allocation will go away because it is
3332  // not thread scalable.
3333 
3334  return numValid;
3335  }
3336 
3337  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3338  LocalOrdinal
3340  sumIntoLocalValues (const LocalOrdinal localRow,
3341  const Teuchos::ArrayView<const LocalOrdinal>& indices,
3342  const Teuchos::ArrayView<const Scalar>& values,
3343  const bool atomic) const
3344  {
3345  typedef LocalOrdinal LO;
3346 
3347  const LO numInputEnt = static_cast<LO> (indices.size ());
3348  if (static_cast<LO> (values.size ()) != numInputEnt) {
3349  return Teuchos::OrdinalTraits<LO>::invalid ();
3350  }
3351  const LO* const inputInds = indices.getRawPtr ();
3352  const Scalar* const inputVals = values.getRawPtr ();
3353  return this->sumIntoLocalValues (localRow, numInputEnt,
3354  inputVals, inputInds, atomic);
3355  }
3356 
3357  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3358  LocalOrdinal
3360  sumIntoLocalValues (const LocalOrdinal localRow,
3361  const LocalOrdinal numEnt,
3362  const Scalar vals[],
3363  const LocalOrdinal cols[],
3364  const bool atomic) const
3365  {
3366  typedef impl_scalar_type IST;
3367  typedef LocalOrdinal LO;
3368 
3369  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3370  // Fill must be active and the "nonconst" graph must exist.
3371  return Teuchos::OrdinalTraits<LO>::invalid ();
3372  }
3373  const crs_graph_type& graph = * (this->staticGraph_);
3374  const RowInfo rowInfo = graph.getRowInfo (localRow);
3375 
3376  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3377  // The calling process does not own this row, so it is not
3378  // allowed to modify its values.
3379  return static_cast<LO> (0);
3380  }
3381  auto curRowVals = this->getRowViewNonConst (rowInfo);
3382  const IST* const inputVals = reinterpret_cast<const IST*> (vals);
3383  return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3384  cols, inputVals, numEnt, atomic);
3385  }
3386 
3387  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3388  Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type>
3390  getView (RowInfo rowinfo) const
3391  {
3392  using Kokkos::MemoryUnmanaged;
3393  using Kokkos::View;
3394  using Teuchos::ArrayView;
3395  typedef impl_scalar_type ST;
3396  typedef std::pair<size_t, size_t> range_type;
3397 
3398  if (k_values1D_.extent (0) != 0 && rowinfo.allocSize > 0) {
3399 #ifdef HAVE_TPETRA_DEBUG
3400  TEUCHOS_TEST_FOR_EXCEPTION(
3401  rowinfo.offset1D + rowinfo.allocSize > k_values1D_.extent (0),
3402  std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
3403  "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
3404  rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
3405  ") > k_values1D_.extent(0) (" << k_values1D_.extent (0) << ").");
3406 #endif // HAVE_TPETRA_DEBUG
3407  range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
3408  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
3409  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3410  // directly, because that first creates a _managed_ subview,
3411  // then returns an unmanaged version of that. That touches the
3412  // reference count, which costs performance in a measurable way.
3413  // Instead, we create a temporary unmanaged view, then create
3414  // the subview from that.
3415  subview_type sv = Kokkos::subview (subview_type (k_values1D_), range);
3416  const ST* const sv_raw = (rowinfo.allocSize == 0) ? NULL : sv.data ();
3417  return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
3418  }
3419  else if (values2D_ != Teuchos::null) {
3420  return values2D_[rowinfo.localRow] ();
3421  }
3422  else {
3423  return ArrayView<impl_scalar_type> ();
3424  }
3425  }
3426 
3427 
3428  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3429  LocalOrdinal
3432  LocalOrdinal& numEnt,
3433  const RowInfo& rowinfo) const
3434  {
3435  if (k_values1D_.extent (0) != 0 && rowinfo.allocSize > 0) {
3436 #ifdef HAVE_TPETRA_DEBUG
3437  if (rowinfo.offset1D + rowinfo.allocSize > k_values1D_.extent (0)) {
3438  vals = NULL;
3439  numEnt = 0;
3440  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
3441  }
3442 #endif // HAVE_TPETRA_DEBUG
3443  vals = k_values1D_.data () + rowinfo.offset1D;
3444  numEnt = rowinfo.allocSize;
3445  }
3446  else if (! values2D_.is_null ()) {
3447 #ifdef HAVE_TPETRA_DEBUG
3448  if (rowinfo.localRow >= static_cast<size_t> (values2D_.size ())) {
3449  vals = NULL;
3450  numEnt = 0;
3451  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
3452  }
3453 #endif // HAVE_TPETRA_DEBUG
3454  // Use const reference so that we don't update ArrayRCP's
3455  // reference count, which is not thread safe.
3456  const auto& curRow = values2D_[rowinfo.localRow];
3457  vals = curRow.getRawPtr ();
3458  numEnt = curRow.size ();
3459  }
3460  else {
3461  vals = NULL;
3462  numEnt = 0;
3463  }
3464 
3465  return static_cast<LocalOrdinal> (0);
3466  }
3467 
3468  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3469  LocalOrdinal
3472  LocalOrdinal& numEnt,
3473  const RowInfo& rowinfo) const
3474  {
3475  const impl_scalar_type* valsConst;
3476  const LocalOrdinal err = this->getViewRawConst (valsConst, numEnt, rowinfo);
3477  vals = const_cast<impl_scalar_type*> (valsConst);
3478  return err;
3479  }
3480 
3481  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3482  Kokkos::View<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type*,
3484  Kokkos::MemoryUnmanaged>
3486  getRowView (const RowInfo& rowInfo) const
3487  {
3488  using Kokkos::MemoryUnmanaged;
3489  using Kokkos::View;
3490  typedef impl_scalar_type ST;
3491  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
3492  typedef std::pair<size_t, size_t> range_type;
3493 
3494  if (k_values1D_.extent (0) != 0 && rowInfo.allocSize > 0) {
3495 #ifdef HAVE_TPETRA_DEBUG
3496  TEUCHOS_TEST_FOR_EXCEPTION
3497  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.extent (0),
3498  std::range_error, "Tpetra::CrsMatrix::getRowView: Invalid access "
3499  "to 1-D storage of values. rowInfo.offset1D ("
3500  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
3501  << ") > this->k_values1D_.extent(0) ("
3502  << this->k_values1D_.extent (0) << ").");
3503 #endif // HAVE_TPETRA_DEBUG
3504  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
3505  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3506  // directly, because that first creates a _managed_ subview,
3507  // then returns an unmanaged version of that. That touches the
3508  // reference count, which costs performance in a measurable way.
3509  // Instead, we create a temporary unmanaged view, then create
3510  // the subview from that.
3511  return Kokkos::subview (subview_type (this->k_values1D_), range);
3512  }
3513  else if (this->values2D_ != Teuchos::null) {
3514  // Use a reference, so that I don't touch the Teuchos::ArrayView
3515  // reference count in a debug build. (It has no reference count
3516  // in a release build.) This ensures thread safety.
3517  auto& rowView = this->values2D_[rowInfo.localRow];
3518  return subview_type (rowView.getRawPtr (), rowView.size ());
3519  }
3520  else {
3521  return subview_type ();
3522  }
3523  }
3524 
3525  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3526  Kokkos::View<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type*,
3527  typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::execution_space,
3528  Kokkos::MemoryUnmanaged>
3529  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3530  getRowViewNonConst (const RowInfo& rowInfo) const
3531  {
3532  using Kokkos::MemoryUnmanaged;
3533  using Kokkos::View;
3534  typedef impl_scalar_type ST;
3535  typedef View<ST*, execution_space, MemoryUnmanaged> subview_type;
3536  typedef std::pair<size_t, size_t> range_type;
3537 
3538  if (k_values1D_.extent (0) != 0 && rowInfo.allocSize > 0) {
3539 #ifdef HAVE_TPETRA_DEBUG
3540  TEUCHOS_TEST_FOR_EXCEPTION
3541  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.extent (0),
3542  std::range_error, "Tpetra::CrsMatrix::getRowViewNonConst: Invalid "
3543  "access to 1-D storage of values. rowInfo.offset1D ("
3544  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
3545  << ") > this->k_values1D_.extent(0) ("
3546  << this->k_values1D_.extent (0) << ").");
3547 #endif // HAVE_TPETRA_DEBUG
3548  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
3549  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3550  // directly, because that first creates a _managed_ subview,
3551  // then returns an unmanaged version of that. That touches the
3552  // reference count, which costs performance in a measurable way.
3553  // Instead, we create a temporary unmanaged view, then create
3554  // the subview from that.
3555  return Kokkos::subview (subview_type (this->k_values1D_), range);
3556  }
3557  else if (this->values2D_ != Teuchos::null) {
3558  // Use a reference, so that I don't touch the Teuchos::ArrayView
3559  // reference count in a debug build. (It has no reference count
3560  // in a release build.) This ensures thread safety.
3561  auto& rowView = this->values2D_[rowInfo.localRow];
3562  return subview_type (rowView.getRawPtr (), rowView.size ());
3563  }
3564  else {
3565  return subview_type ();
3566  }
3567  }
3568 
3569  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3570  Teuchos::ArrayView<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type>
3572  getViewNonConst (const RowInfo& rowinfo) const
3573  {
3574  return Teuchos::av_const_cast<impl_scalar_type> (this->getView (rowinfo));
3575  }
3576 
3577  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3578  void
3580  getLocalRowCopy (LocalOrdinal localRow,
3581  const Teuchos::ArrayView<LocalOrdinal>& indices,
3582  const Teuchos::ArrayView<Scalar>& values,
3583  size_t& numEntries) const
3584  {
3585  using Teuchos::ArrayView;
3586  using Teuchos::av_reinterpret_cast;
3587  const char tfecfFuncName[] = "getLocalRowCopy: ";
3588 
3589  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3590  (! this->hasColMap (), std::runtime_error,
3591  "The matrix does not have a column Map yet. This means we don't have "
3592  "local indices for columns yet, so it doesn't make sense to call this "
3593  "method. If the matrix doesn't have a column Map yet, you should call "
3594  "fillComplete on it first.");
3595 
3596  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3597  const size_t theNumEntries = rowinfo.numEntries;
3598  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3599  (static_cast<size_t> (indices.size ()) < theNumEntries ||
3600  static_cast<size_t> (values.size ()) < theNumEntries,
3601  std::runtime_error, "Row with local index " << localRow << " has " <<
3602  theNumEntries << " entry/ies, but indices.size() = " <<
3603  indices.size () << " and values.size() = " << values.size () << ".");
3604  numEntries = theNumEntries; // first side effect
3605 
3606  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3607  if (staticGraph_->isLocallyIndexed ()) {
3608  const LocalOrdinal* curLclInds;
3609  const impl_scalar_type* curVals;
3610  LocalOrdinal numSpots; // includes both current entries and extra space
3611 
3612  // If we got this far, rowinfo should be correct and should
3613  // refer to a valid local row. Thus, these error checks are
3614  // superfluous, but we retain them in a debug build.
3615 #ifdef HAVE_TPETRA_DEBUG
3616  int err =
3617  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3618  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3619  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3620  "staticGraph_->getLocalViewRawConst returned nonzero error code "
3621  << err << ".");
3622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3623  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3624  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3625  << ".");
3626  const LocalOrdinal numSpotsBefore = numSpots;
3627  err = getViewRawConst (curVals, numSpots, rowinfo);
3628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3629  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3630  "getViewRaw returned nonzero error code " << err << ".");
3631  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3632  (numSpotsBefore != numSpots, std::logic_error,
3633  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3634  << numSpots << ".");
3635 #else
3636  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3637  (void) getViewRawConst (curVals, numSpots, rowinfo);
3638 #endif // HAVE_TPETRA_DEBUG
3639 
3640  for (size_t j = 0; j < theNumEntries; ++j) {
3641  values[j] = curVals[j];
3642  indices[j] = curLclInds[j];
3643  }
3644  }
3645  else if (staticGraph_->isGloballyIndexed ()) {
3646  // Don't call getColMap(), because it touches RCP's reference count.
3647  const map_type& colMap = * (staticGraph_->colMap_);
3648  const GlobalOrdinal* curGblInds;
3649  const impl_scalar_type* curVals;
3650  LocalOrdinal numSpots; // includes both current entries and extra space
3651 
3652  // If we got this far, rowinfo should be correct and should
3653  // refer to a valid local row. Thus, these error checks are
3654  // superfluous, but we retain them in a debug build.
3655 #ifdef HAVE_TPETRA_DEBUG
3656  int err =
3657  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3658  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3659  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3660  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
3661  << err << ".");
3662  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3663  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3664  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3665  << ".");
3666  const LocalOrdinal numSpotsBefore = numSpots;
3667  err = getViewRawConst (curVals, numSpots, rowinfo);
3668  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3669  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3670  "getViewRawConst returned nonzero error code " << err << ".");
3671  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3672  (numSpotsBefore != numSpots, std::logic_error,
3673  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3674  << numSpots << ".");
3675 #else
3676  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3677  (void) getViewRawConst (curVals, numSpots, rowinfo);
3678 #endif //HAVE_TPETRA_DEBUG
3679 
3680  for (size_t j = 0; j < theNumEntries; ++j) {
3681  values[j] = curVals[j];
3682  indices[j] = colMap.getLocalElement (curGblInds[j]);
3683  }
3684  }
3685  }
3686  }
3687 
3688  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3689  void
3691  getGlobalRowCopy (GlobalOrdinal globalRow,
3692  const Teuchos::ArrayView<GlobalOrdinal>& indices,
3693  const Teuchos::ArrayView<Scalar>& values,
3694  size_t& numEntries) const
3695  {
3696  using Teuchos::ArrayView;
3697  using Teuchos::av_reinterpret_cast;
3698  const char tfecfFuncName[] = "getGlobalRowCopy: ";
3699 
3700  const RowInfo rowinfo =
3701  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3702  const size_t theNumEntries = rowinfo.numEntries;
3703  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3704  static_cast<size_t> (indices.size ()) < theNumEntries ||
3705  static_cast<size_t> (values.size ()) < theNumEntries,
3706  std::runtime_error, "Row with global index " << globalRow << " has "
3707  << theNumEntries << " entry/ies, but indices.size() = " <<
3708  indices.size () << " and values.size() = " << values.size () << ".");
3709  numEntries = theNumEntries; // first side effect
3710 
3711  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3712  if (staticGraph_->isLocallyIndexed ()) {
3713  const map_type& colMap = * (staticGraph_->colMap_);
3714  const LocalOrdinal* curLclInds;
3715  const impl_scalar_type* curVals;
3716  LocalOrdinal numSpots; // includes both current entries and extra space
3717 
3718  // If we got this far, rowinfo should be correct and should
3719  // refer to a valid local row. Thus, these error checks are
3720  // superfluous, but we retain them in a debug build.
3721 #ifdef HAVE_TPETRA_DEBUG
3722  int err =
3723  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3724  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3725  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3726  "staticGraph_->getLocalViewRawConst returned nonzero error code "
3727  << err << ".");
3728  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3729  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3730  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3731  << ".");
3732  const LocalOrdinal numSpotsBefore = numSpots;
3733  err = getViewRawConst (curVals, numSpots, rowinfo);
3734  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3735  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3736  "getViewRaw returned nonzero error code " << err << ".");
3737  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3738  (numSpotsBefore != numSpots, std::logic_error,
3739  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3740  << numSpots << ".");
3741 #else
3742  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3743  (void) getViewRawConst (curVals, numSpots, rowinfo);
3744 #endif //HAVE_TPETRA_DEBUG
3745 
3746  for (size_t j = 0; j < theNumEntries; ++j) {
3747  values[j] = curVals[j];
3748  indices[j] = colMap.getGlobalElement (curLclInds[j]);
3749  }
3750  }
3751  else if (staticGraph_->isGloballyIndexed ()) {
3752  const GlobalOrdinal* curGblInds;
3753  const impl_scalar_type* curVals;
3754  LocalOrdinal numSpots; // includes both current entries and extra space
3755 
3756  // If we got this far, rowinfo should be correct and should
3757  // refer to a valid local row. Thus, these error checks are
3758  // superfluous, but we retain them in a debug build.
3759 #ifdef HAVE_TPETRA_DEBUG
3760  int err =
3761  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3762  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3763  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3764  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
3765  << err << ".");
3766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3767  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3768  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3769  << ".");
3770  const LocalOrdinal numSpotsBefore = numSpots;
3771  err = getViewRawConst (curVals, numSpots, rowinfo);
3772  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3773  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3774  "getViewRawConst returned nonzero error code " << err << ".");
3775  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3776  (numSpotsBefore != numSpots, std::logic_error,
3777  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3778  << numSpots << ".");
3779 #else
3780  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3781  (void) getViewRawConst (curVals, numSpots, rowinfo);
3782 #endif //HAVE_TPETRA_DEBUG
3783 
3784  for (size_t j = 0; j < theNumEntries; ++j) {
3785  values[j] = curVals[j];
3786  indices[j] = curGblInds[j];
3787  }
3788  }
3789  }
3790  }
3791 
3792  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3793  void
3795  getLocalRowView (LocalOrdinal localRow,
3796  Teuchos::ArrayView<const LocalOrdinal>& indices,
3797  Teuchos::ArrayView<const Scalar>& values) const
3798  {
3799  using Teuchos::ArrayView;
3800  using Teuchos::av_reinterpret_cast;
3801  typedef LocalOrdinal LO;
3802  const char tfecfFuncName[] = "getLocalRowView: ";
3803 
3804  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3805  isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
3806  "its indices as global indices, so you cannot get a view with local "
3807  "column indices. If the matrix has a column Map, you may call "
3808  "getLocalRowCopy() to get local column indices; otherwise, you may get "
3809  "a view with global column indices by calling getGlobalRowCopy().");
3810  indices = Teuchos::null;
3811  values = Teuchos::null;
3812  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3813  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3814  rowinfo.numEntries > 0) {
3815  ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
3816  ArrayView<const Scalar> valTmp =
3817  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3818  indices = indTmp (0, rowinfo.numEntries);
3819  values = valTmp (0, rowinfo.numEntries);
3820  }
3821 
3822 #ifdef HAVE_TPETRA_DEBUG
3823  const char suffix[] = ". This should never happen. Please report this "
3824  "bug to the Tpetra developers.";
3825  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3826  (static_cast<size_t> (indices.size ()) !=
3827  static_cast<size_t> (values.size ()), std::logic_error,
3828  "At the end of this method, for local row " << localRow << ", "
3829  "indices.size() = " << indices.size () << " != values.size () = "
3830  << values.size () << suffix);
3831  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3832  (static_cast<size_t> (indices.size ()) !=
3833  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3834  "At the end of this method, for local row " << localRow << ", "
3835  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3836  << rowinfo.numEntries << suffix);
3837  const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3839  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3840  "of this method, for local row " << localRow << ", rowinfo.numEntries = "
3841  << rowinfo.numEntries << " != getNumEntriesInLocalRow(localRow) = " <<
3842  expectedNumEntries << suffix);
3843 #endif // HAVE_TPETRA_DEBUG
3844  }
3845 
3846  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3847  LocalOrdinal
3849  getLocalRowView (const LocalOrdinal lclRow,
3850  LocalOrdinal& numEnt,
3851  const impl_scalar_type*& val,
3852  const LocalOrdinal*& ind) const
3853  {
3854  typedef LocalOrdinal LO;
3855 
3856  // Don't call getCrsGraph(), because that modfies an RCP reference
3857  // count, which is not thread safe. Checking whether an RCP is
3858  // null does NOT modify its reference count, and is therefore
3859  // thread safe. Note that isGloballyIndexed() calls
3860  // getCrsGraph(), so we have to go to the graph directly.
3861  if (staticGraph_.is_null () || staticGraph_->isGloballyIndexed ()) {
3862  return Tpetra::Details::OrdinalTraits<LO>::invalid ();
3863  }
3864  else {
3865  const RowInfo rowInfo = staticGraph_->getRowInfo (lclRow);
3866  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
3867  numEnt = 0; // no valid entries in this row on the calling process
3868  val = NULL;
3869  ind = NULL;
3870  // First argument (lclRow) invalid, so make 1 the error code.
3871  return static_cast<LO> (1);
3872  }
3873  else {
3874  numEnt = static_cast<LO> (rowInfo.numEntries);
3875  auto lclColInds = staticGraph_->getLocalKokkosRowView (rowInfo);
3876  ind = lclColInds.data (); // FIXME (mfh 18 Jul 2016) UVM
3877  const LO err = this->getViewRawConst (val, numEnt, rowInfo);
3878  return err;
3879  }
3880  }
3881  }
3882 
3883  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3884  LocalOrdinal
3886  getLocalRowViewRaw (const LocalOrdinal lclRow,
3887  LocalOrdinal& numEnt,
3888  const LocalOrdinal*& lclColInds,
3889  const Scalar*& vals) const
3890  {
3891  const impl_scalar_type* vals_ist = NULL;
3892  const LocalOrdinal errCode =
3893  this->getLocalRowView (lclRow, numEnt, vals_ist, lclColInds);
3894  vals = reinterpret_cast<const Scalar*> (vals_ist);
3895  return errCode;
3896  }
3897 
3898  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3899  void
3901  getGlobalRowView (GlobalOrdinal globalRow,
3902  Teuchos::ArrayView<const GlobalOrdinal>& indices,
3903  Teuchos::ArrayView<const Scalar>& values) const
3904  {
3905  using Teuchos::ArrayView;
3906  using Teuchos::av_reinterpret_cast;
3907  typedef GlobalOrdinal GO;
3908  const char tfecfFuncName[] = "getGlobalRowView: ";
3909 
3910  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3911  isLocallyIndexed (), std::runtime_error,
3912  "The matrix is locally indexed, so we cannot return a view of the row "
3913  "with global column indices. Use getGlobalRowCopy() instead.");
3914  indices = Teuchos::null;
3915  values = Teuchos::null;
3916  const RowInfo rowinfo =
3917  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3918  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3919  rowinfo.numEntries > 0) {
3920  ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
3921  ArrayView<const Scalar> valTmp =
3922  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3923 #ifdef HAVE_TPETRA_DEBUG
3924  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3925  (static_cast<size_t> (indTmp.size ()) < rowinfo.numEntries ||
3926  static_cast<size_t> (valTmp.size ()) < rowinfo.numEntries,
3927  std::logic_error, std::endl << "rowinfo.numEntries not accurate. "
3928  << std::endl << "indTmp.size() = " << indTmp.size ()
3929  << ", valTmp.size() = " << valTmp.size ()
3930  << ", rowinfo.numEntries = " << rowinfo.numEntries << ".");
3931 #endif // HAVE_TPETRA_DEBUG
3932  indices = indTmp (0, rowinfo.numEntries);
3933  values = valTmp (0, rowinfo.numEntries);
3934  }
3935 
3936 #ifdef HAVE_TPETRA_DEBUG
3937  const char suffix[] = ". This should never happen. Please report this "
3938  "bug to the Tpetra developers.";
3939  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3940  (static_cast<size_t> (indices.size ()) !=
3941  static_cast<size_t> (values.size ()), std::logic_error,
3942  "At the end of this method, for global row " << globalRow << ", "
3943  "indices.size() = " << indices.size () << " != values.size () = "
3944  << values.size () << suffix);
3945  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3946  (static_cast<size_t> (indices.size ()) !=
3947  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3948  "At the end of this method, for global row " << globalRow << ", "
3949  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3950  << rowinfo.numEntries << suffix);
3951  const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3952  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3953  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3954  "of this method, for global row " << globalRow << ", rowinfo.numEntries "
3955  "= " << rowinfo.numEntries << " != getNumEntriesInGlobalRow(globalRow) ="
3956  " " << expectedNumEntries << suffix);
3957 #endif // HAVE_TPETRA_DEBUG
3958  }
3959 
3960  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3961  void
3963  scale (const Scalar& alpha)
3964  {
3965  typedef LocalOrdinal LO;
3966  typedef typename Teuchos::Array<Scalar>::size_type size_type;
3967  const char tfecfFuncName[] = "scale: ";
3968  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3969 
3970  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3971  ! isFillActive (), std::runtime_error,
3972  "Fill must be active before you may call this method. "
3973  "Please call resumeFill() to make fill active.");
3974 
3975  const size_t nlrs = staticGraph_->getNodeNumRows ();
3976  const size_t numEntries = staticGraph_->getNodeNumEntries ();
3977  if (! staticGraph_->indicesAreAllocated () ||
3978  nlrs == 0 || numEntries == 0) {
3979  // do nothing
3980  }
3981  else {
3982  if (staticGraph_->getProfileType () == StaticProfile) {
3983  const LO lclNumRows = lclMatrix_.numRows ();
3984  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3985  auto row_i = lclMatrix_.row (lclRow);
3986  for (LO k = 0; k < row_i.length; ++k) {
3987  // FIXME (mfh 02 Jan 2015) This assumes CUDA UVM.
3988  row_i.value (k) *= theAlpha;
3989  }
3990  }
3991  }
3992  else if (staticGraph_->getProfileType () != StaticProfile) {
3993  for (size_t row = 0; row < nlrs; ++row) {
3994  const size_type numEnt = getNumEntriesInLocalRow (row);
3995  Teuchos::ArrayView<impl_scalar_type> rowVals = values2D_[row] ();
3996  for (size_type k = 0; k < numEnt; ++k) {
3997  rowVals[k] *= theAlpha;
3998  }
3999  }
4000  }
4001  }
4002  }
4003 
4004  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4005  void
4007  setAllToScalar (const Scalar& alpha)
4008  {
4009  const char tfecfFuncName[] = "setAllToScalar: ";
4010  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
4011  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4012  ! isFillActive (), std::runtime_error,
4013  "Fill must be active before you may call this method. "
4014  "Please call resumeFill() to make fill active.");
4015 
4016  // replace all values in the matrix
4017  // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
4018  // however, if there are no valid entries, we can short-circuit
4019  // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
4020  const size_t nlrs = staticGraph_->getNodeNumRows();
4021  const size_t numEntries = staticGraph_->getNodeNumEntries();
4022  if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
4023  // do nothing
4024  }
4025  else {
4026  const ProfileType profType = staticGraph_->getProfileType ();
4027  if (profType == StaticProfile) {
4028  // FIXME (mfh 24 Dec 2014) Once CrsMatrix implements DualView
4029  // semantics, this would be the place to mark memory as
4030  // modified.
4031  Kokkos::deep_copy (k_values1D_, theAlpha);
4032  }
4033  else if (profType != StaticProfile) {
4034  for (size_t row = 0; row < nlrs; ++row) {
4035  std::fill (values2D_[row].begin (), values2D_[row].end (), theAlpha);
4036  }
4037  }
4038  }
4039  }
4040 
4041  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4042  void
4044  setAllValues (const typename local_matrix_type::row_map_type& rowPointers,
4045  const typename local_graph_type::entries_type::non_const_type& columnIndices,
4046  const typename local_matrix_type::values_type& values)
4047  {
4048  const char tfecfFuncName[] = "setAllValues: ";
4049  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4050  (columnIndices.size () != values.size (), std::invalid_argument,
4051  "columnIndices.size() = " << columnIndices.size () << " != values.size()"
4052  " = " << values.size () << ".");
4053  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4054  (myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
4055 
4056  try {
4057  myGraph_->setAllIndices (rowPointers, columnIndices);
4058  }
4059  catch (std::exception &e) {
4060  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4061  (true, std::runtime_error, "myGraph_->setAllIndices() threw an "
4062  "exception: " << e.what ());
4063  }
4064  // Make sure that myGraph_ now has a local graph. It may not be
4065  // fillComplete yet, so it's important to check. We don't care
4066  // whether setAllIndices() did a shallow copy or a deep copy, so a
4067  // good way to check is to compare dimensions.
4068  auto lclGraph = myGraph_->getLocalGraph ();
4069  const size_t numEnt = lclGraph.entries.extent (0);
4070  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4071  (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
4072  numEnt != static_cast<size_t> (columnIndices.extent (0)),
4073  std::logic_error, "myGraph_->setAllIndices() did not correctly create "
4074  "local graph. Please report this bug to the Tpetra developers.");
4075 
4076  const size_t numCols = myGraph_->getColMap ()->getNodeNumElements ();
4077  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
4078  numCols, values, lclGraph);
4079  // FIXME (22 Jun 2016) I would very much like to get rid of
4080  // k_values1D_ at some point. I find it confusing to have all
4081  // these extra references lying around.
4082  this->k_values1D_ = this->lclMatrix_.values;
4083 
4084  // Storage MUST be packed, since the interface doesn't give any
4085  // way to indicate any extra space at the end of each row.
4086  this->storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
4087 
4088  checkInternalState ();
4089  }
4090 
4091  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4092  void
4094  setAllValues (const Teuchos::ArrayRCP<size_t>& ptr,
4095  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
4096  const Teuchos::ArrayRCP<Scalar>& val)
4097  {
4098  using Kokkos::Compat::getKokkosViewDeepCopy;
4099  using Teuchos::ArrayRCP;
4100  using Teuchos::av_reinterpret_cast;
4101  typedef device_type DT;
4102  typedef impl_scalar_type IST;
4103  typedef typename local_matrix_type::row_map_type row_map_type;
4104  //typedef typename row_map_type::non_const_value_type row_offset_type;
4105  const char tfecfFuncName[] = "setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
4106 
4107  // The row offset type may depend on the execution space. It may
4108  // not necessarily be size_t. If it's not, we need to make a deep
4109  // copy. We need to make a deep copy anyway so that Kokkos can
4110  // own the memory. Regardless, ptrIn gets the copy.
4111  typename row_map_type::non_const_type ptrNative ("ptr", ptr.size ());
4112  Kokkos::View<const size_t*,
4113  typename row_map_type::array_layout,
4114  Kokkos::HostSpace,
4115  Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
4116  ::Tpetra::Details::copyOffsets (ptrNative, ptrSizeT);
4117 
4118  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4119  (ptrNative.extent (0) != ptrSizeT.extent (0),
4120  std::logic_error, "ptrNative.extent(0) = " <<
4121  ptrNative.extent (0) << " != ptrSizeT.extent(0) = "
4122  << ptrSizeT.extent (0) << ". Please report this bug to the "
4123  "Tpetra developers.");
4124 
4125  auto indIn = getKokkosViewDeepCopy<DT> (ind ());
4126  auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
4127  this->setAllValues (ptrNative, indIn, valIn);
4128  }
4129 
4130  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4131  void
4133  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
4134  {
4135  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
4136  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4137  (staticGraph_.is_null (), std::runtime_error, "The matrix has no graph.");
4138 
4139  // mfh 11 May 2016: We plan to deprecate the ArrayRCP version of
4140  // this method in CrsGraph too, so don't call it (otherwise build
4141  // warnings will show up and annoy users). Instead, copy results
4142  // in and out, if the memory space requires it.
4143 
4144  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
4145  if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
4146  offsets.resize (lclNumRows);
4147  }
4148 
4149  // The input ArrayRCP must always be a host pointer. Thus, if
4150  // device_type::memory_space is Kokkos::HostSpace, it's OK for us
4151  // to write to that allocation directly as a Kokkos::View.
4152  typedef typename device_type::memory_space memory_space;
4153  if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
4154  // It is always syntactically correct to assign a raw host
4155  // pointer to a device View, so this code will compile correctly
4156  // even if this branch never runs.
4157  typedef Kokkos::View<size_t*, device_type,
4158  Kokkos::MemoryUnmanaged> output_type;
4159  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
4160  staticGraph_->getLocalDiagOffsets (offsetsOut);
4161  }
4162  else {
4163  Kokkos::View<size_t*, device_type> offsetsTmp ("diagOffsets", lclNumRows);
4164  staticGraph_->getLocalDiagOffsets (offsetsTmp);
4165  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
4166  Kokkos::MemoryUnmanaged> output_type;
4167  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
4168  Kokkos::deep_copy (offsetsOut, offsetsTmp);
4169  }
4170  }
4171 
4172  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4173  void
4176  {
4177  using Teuchos::ArrayRCP;
4178  using Teuchos::ArrayView;
4179  using Teuchos::av_reinterpret_cast;
4180  const char tfecfFuncName[] = "getLocalDiagCopy (1-arg): ";
4181  typedef local_ordinal_type LO;
4182 
4183 
4184  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4185  staticGraph_.is_null (), std::runtime_error,
4186  "This method requires that the matrix have a graph.");
4187  auto rowMapPtr = this->getRowMap ();
4188  if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
4189  // Processes on which the row Map or its communicator is null
4190  // don't participate. Users shouldn't even call this method on
4191  // those processes.
4192  return;
4193  }
4194  auto colMapPtr = this->getColMap ();
4195  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4196  (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
4197  "This method requires that the matrix have a column Map.");
4198  const map_type& rowMap = * rowMapPtr;
4199  const map_type& colMap = * colMapPtr;
4200  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4201 
4202 #ifdef HAVE_TPETRA_DEBUG
4203  // isCompatible() requires an all-reduce, and thus this check
4204  // should only be done in debug mode.
4205  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4206  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
4207  "The input Vector's Map must be compatible with the CrsMatrix's row "
4208  "Map. You may check this by using Map's isCompatible method: "
4209  "diag.getMap ()->isCompatible (A.getRowMap ());");
4210 #endif // HAVE_TPETRA_DEBUG
4211 
4212  if (this->isFillComplete ()) {
4213  diag.template modify<device_type> ();
4214  const auto D_lcl = diag.template getLocalView<device_type> ();
4215  // 1-D subview of the first (and only) column of D_lcl.
4216  const auto D_lcl_1d =
4217  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
4218 
4219  const auto lclRowMap = rowMap.getLocalMap ();
4220  const auto lclColMap = colMap.getLocalMap ();
4221  const auto lclMatrix = this->lclMatrix_;
4223  (void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
4224  lclColMap, lclMatrix);
4225  }
4226  else {
4228  (void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
4229  }
4230  }
4231 
4232  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4233  void
4236  const Kokkos::View<const size_t*, device_type,
4237  Kokkos::MemoryUnmanaged>& offsets) const
4238  {
4239  typedef LocalOrdinal LO;
4240 
4241 #ifdef HAVE_TPETRA_DEBUG
4242  const char tfecfFuncName[] = "getLocalDiagCopy: ";
4243  const map_type& rowMap = * (this->getRowMap ());
4244  // isCompatible() requires an all-reduce, and thus this check
4245  // should only be done in debug mode.
4246  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4247  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
4248  "The input Vector's Map must be compatible with (in the sense of Map::"
4249  "isCompatible) the CrsMatrix's row Map.");
4250 #endif // HAVE_TPETRA_DEBUG
4251 
4252  // For now, we fill the Vector on the host and sync to device.
4253  // Later, we may write a parallel kernel that works entirely on
4254  // device.
4255  //
4256  // NOTE (mfh 21 Jan 2016): The host kernel here assumes UVM. Once
4257  // we write a device kernel, it will not need to assume UVM.
4258 
4259  diag.template modify<device_type> ();
4260  auto D_lcl = diag.template getLocalView<device_type> ();
4261  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4262  // Get 1-D subview of the first (and only) column of D_lcl.
4263  auto D_lcl_1d =
4264  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
4265 
4266  KokkosSparse::getDiagCopy (D_lcl_1d, offsets, this->lclMatrix_);
4267  }
4268 
4269  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4270  void
4273  const Teuchos::ArrayView<const size_t>& offsets) const
4274  {
4275  using LO = LocalOrdinal;
4276  using host_execution_space = Kokkos::DefaultHostExecutionSpace;
4277  using IST = impl_scalar_type;
4278 
4279 #ifdef HAVE_TPETRA_DEBUG
4280  const char tfecfFuncName[] = "getLocalDiagCopy: ";
4281  const map_type& rowMap = * (this->getRowMap ());
4282  // isCompatible() requires an all-reduce, and thus this check
4283  // should only be done in debug mode.
4284  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4285  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
4286  "The input Vector's Map must be compatible with (in the sense of Map::"
4287  "isCompatible) the CrsMatrix's row Map.");
4288 #endif // HAVE_TPETRA_DEBUG
4289 
4290  // See #1510. In case diag has already been marked modified on
4291  // device, we need to clear that flag, since the code below works
4292  // on host.
4293  diag.clear_sync_state ();
4294 
4295  // For now, we fill the Vector on the host and sync to device.
4296  // Later, we may write a parallel kernel that works entirely on
4297  // device.
4298  diag.modify_host ();
4299  auto lclVecHost = diag.getLocalViewHost ();
4300  // 1-D subview of the first (and only) column of lclVecHost.
4301  auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
4302 
4303  using host_offsets_view_type =
4304  Kokkos::View<const size_t*, Kokkos::HostSpace,
4305  Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
4306  host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
4307  // Find the diagonal entries and put them in lclVecHost1d.
4308  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4309  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4310  const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
4311  Kokkos::parallel_for
4312  ("Tpetra::CrsMatrix::getLocalDiagCopy",
4313  range_type (0, myNumRows),
4314  [&] (const LO& lclRow) {
4315  lclVecHost1d(lclRow) = STS::zero (); // default value if no diag entry
4316  if (h_offsets[lclRow] != INV) {
4317  auto curRow = lclMatrix_.rowConst (lclRow);
4318  lclVecHost1d(lclRow) = static_cast<IST> (curRow.value(h_offsets[lclRow]));
4319  }
4320  });
4321  diag.sync_device ();
4322  }
4323 
4324 
4325  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4326  void
4329  {
4330  using ::Tpetra::Details::ProfilingRegion;
4331  using Teuchos::ArrayRCP;
4332  using Teuchos::ArrayView;
4333  using Teuchos::null;
4334  using Teuchos::RCP;
4335  using Teuchos::rcp;
4336  using Teuchos::rcpFromRef;
4337  using LO = local_ordinal_type;
4339  const char tfecfFuncName[] = "leftScale: ";
4340 
4341  ProfilingRegion region ("Tpetra::CrsMatrix::leftScale");
4342 
4343  RCP<const vec_type> xp;
4344  if (this->getRangeMap ()->isSameAs (* (x.getMap ()))) {
4345  // Take from Epetra: If we have a non-trivial exporter, we must
4346  // import elements that are permuted or are on other processors.
4347  auto exporter = this->getCrsGraphRef ().getExporter ();
4348  if (exporter.get () != nullptr) {
4349  RCP<vec_type> tempVec (new vec_type (this->getRowMap ()));
4350  tempVec->doImport (x, *exporter, REPLACE); // reverse mode
4351  xp = tempVec;
4352  }
4353  else {
4354  xp = rcpFromRef (x);
4355  }
4356  }
4357  else if (this->getRowMap ()->isSameAs (* (x.getMap ()))) {
4358  xp = rcpFromRef (x);
4359  }
4360  else {
4361  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4362  (true, std::invalid_argument, "x's Map must be the same as "
4363  "either the row Map or the range Map of the CrsMatrix.");
4364  }
4365 
4366  // Check whether A has a valid local matrix. It might not if it
4367  // was not created with a local matrix, and if fillComplete has
4368  // never been called on it before. A never-initialized (and thus
4369  // invalid) local matrix has zero rows, because it was default
4370  // constructed.
4371  const LO lclNumRows =
4372  static_cast<LO> (this->getRowMap ()->getNodeNumElements ());
4373  const bool validLocalMatrix = this->lclMatrix_.numRows () == lclNumRows;
4374 
4375  if (validLocalMatrix) {
4376  using dev_memory_space = typename device_type::memory_space;
4377  if (xp->template need_sync<dev_memory_space> ()) {
4378  using Teuchos::rcp_const_cast;
4379  rcp_const_cast<vec_type> (xp)->template sync<dev_memory_space> ();
4380  }
4381  auto x_lcl = xp->template getLocalView<dev_memory_space> ();
4382  auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4383  ::Tpetra::Details::leftScaleLocalCrsMatrix (this->lclMatrix_, x_lcl_1d, false, false);
4384  }
4385  else {
4386  execution_space::fence (); // for UVM's sake
4387 
4388  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
4389  ArrayView<impl_scalar_type> rowValues = Teuchos::null;
4390  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
4391  const RowInfo rowinfo = this->staticGraph_->getRowInfo (i);
4392  rowValues = this->getViewNonConst (rowinfo);
4393  const impl_scalar_type scaleValue = static_cast<impl_scalar_type> (vectorVals[i]);
4394  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4395  rowValues[j] *= scaleValue;
4396  }
4397  }
4398  execution_space::fence (); // for UVM's sake
4399  }
4400  }
4401 
4402  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4403  void
4406  {
4407  using ::Tpetra::Details::ProfilingRegion;
4408  using Teuchos::ArrayRCP;
4409  using Teuchos::ArrayView;
4410  using Teuchos::null;
4411  using Teuchos::RCP;
4412  using Teuchos::rcp;
4413  using Teuchos::rcpFromRef;
4414  using LO = local_ordinal_type;
4416  const char tfecfFuncName[] = "rightScale: ";
4417 
4418  ProfilingRegion region ("Tpetra::CrsMatrix::rightScale");
4419 
4420  RCP<const vec_type> xp;
4421  if (this->getDomainMap ()->isSameAs (* (x.getMap ()))) {
4422  // Take from Epetra: If we have a non-trivial exporter, we must
4423  // import elements that are permuted or are on other processors.
4424  auto importer = this->getCrsGraphRef ().getImporter ();
4425  if (importer.get () != nullptr) {
4426  RCP<vec_type> tempVec (new vec_type (this->getColMap ()));
4427  tempVec->doImport (x, *importer, REPLACE);
4428  xp = tempVec;
4429  }
4430  else {
4431  xp = rcpFromRef (x);
4432  }
4433  }
4434  else if (this->getColMap ()->isSameAs (* (x.getMap ()))) {
4435  xp = rcpFromRef (x);
4436  } else {
4437  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4438  (true, std::runtime_error, "x's Map must be the same as "
4439  "either the domain Map or the column Map of the CrsMatrix.");
4440  }
4441 
4442  // Check whether A has a valid local matrix. It might not if it
4443  // was not created with a local matrix, and if fillComplete has
4444  // never been called on it before. A never-initialized (and thus
4445  // invalid) local matrix has zero rows, because it was default
4446  // constructed.
4447  const LO lclNumRows =
4448  static_cast<LO> (this->getRowMap ()->getNodeNumElements ());
4449  const bool validLocalMatrix = this->lclMatrix_.numRows () == lclNumRows;
4450 
4451  if (validLocalMatrix) {
4452  using dev_memory_space = typename device_type::memory_space;
4453  if (xp->template need_sync<dev_memory_space> ()) {
4454  using Teuchos::rcp_const_cast;
4455  rcp_const_cast<vec_type> (xp)->template sync<dev_memory_space> ();
4456  }
4457  auto x_lcl = xp->template getLocalView<dev_memory_space> ();
4458  auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4459  ::Tpetra::Details::rightScaleLocalCrsMatrix (this->lclMatrix_, x_lcl_1d, false, false);
4460  }
4461  else {
4462  execution_space::fence (); // for UVM's sake
4463 
4464  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
4465  ArrayView<impl_scalar_type> rowValues = null;
4466  for (LO i = 0; i < lclNumRows; ++i) {
4467  const RowInfo rowinfo = this->staticGraph_->getRowInfo (i);
4468  rowValues = this->getViewNonConst (rowinfo);
4469  ArrayView<const LO> colInds;
4470  this->getCrsGraphRef ().getLocalRowView (i, colInds);
4471  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
4472  rowValues[j] *= static_cast<impl_scalar_type> (vectorVals[colInds[j]]);
4473  }
4474  }
4475  execution_space::fence (); // for UVM's sake
4476  }
4477  }
4478 
4479  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4483  {
4484  using Teuchos::ArrayView;
4485  using Teuchos::outArg;
4486  using Teuchos::REDUCE_SUM;
4487  using Teuchos::reduceAll;
4488  typedef typename Teuchos::ArrayRCP<const impl_scalar_type>::size_type size_type;
4489 
4490  // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
4491  // local part of this computation. It could make sense to put
4492  // this operation in the Kokkos::CrsMatrix.
4493 
4494  // check the cache first
4495  mag_type frobNorm = frobNorm_;
4496  if (frobNorm == -STM::one ()) {
4497  mag_type mySum = STM::zero ();
4498  if (getNodeNumEntries() > 0) {
4499  if (isStorageOptimized ()) {
4500  // "Optimized" storage is packed storage. That means we can
4501  // iterate in one pass through the 1-D values array.
4502  const size_type numEntries =
4503  static_cast<size_type> (getNodeNumEntries ());
4504  for (size_type k = 0; k < numEntries; ++k) {
4505  // FIXME (mfh 05 Aug 2014) This assumes UVM.
4506  const impl_scalar_type val = k_values1D_(k);
4507  // Note (etp 06 Jan 2015) We need abs() here for composite types
4508  // (in general, if mag_type is on the left-hand-side, we need
4509  // abs() on the right-hand-side)
4510  const mag_type val_abs = STS::abs (val);
4511  mySum += val_abs * val_abs;
4512  }
4513  }
4514  else {
4515  const LocalOrdinal numRows =
4516  static_cast<LocalOrdinal> (this->getNodeNumRows ());
4517  for (LocalOrdinal r = 0; r < numRows; ++r) {
4518  const RowInfo rowInfo = myGraph_->getRowInfo (r);
4519  const size_type numEntries =
4520  static_cast<size_type> (rowInfo.numEntries);
4521  ArrayView<const impl_scalar_type> A_r =
4522  this->getView (rowInfo).view (0, numEntries);
4523  for (size_type k = 0; k < numEntries; ++k) {
4524  const impl_scalar_type val = A_r[k];
4525  const mag_type val_abs = STS::abs (val);
4526  mySum += val_abs * val_abs;
4527  }
4528  }
4529  }
4530  }
4531  mag_type totalSum = STM::zero ();
4532  reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
4533  mySum, outArg (totalSum));
4534  frobNorm = STM::sqrt (totalSum);
4535  }
4536  if (isFillComplete ()) {
4537  // Only cache the result if the matrix is fill complete.
4538  // Otherwise, the values might still change. resumeFill clears
4539  // the cache.
4540  frobNorm_ = frobNorm;
4541  }
4542  return frobNorm;
4543  }
4544 
4545  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4546  void
4548  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4549  {
4550  const char tfecfFuncName[] = "replaceColMap: ";
4551  // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
4552  // Then replacing the column Map might mean that we need to
4553  // reindex the column indices.
4554  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4555  myGraph_.is_null (), std::runtime_error,
4556  "This method does not work if the matrix has a const graph. The whole "
4557  "idea of a const graph is that you are not allowed to change it, but "
4558  "this method necessarily must modify the graph, since the graph owns "
4559  "the matrix's column Map.");
4560  myGraph_->replaceColMap (newColMap);
4561  }
4562 
4563  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4564  void
4567  const Teuchos::RCP<const map_type>& newColMap,
4568  const Teuchos::RCP<const import_type>& newImport,
4569  const bool sortEachRow)
4570  {
4571  const char tfecfFuncName[] = "reindexColumns: ";
4572  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4573  graph == NULL && myGraph_.is_null (), std::invalid_argument,
4574  "The input graph is NULL, but the matrix does not own its graph.");
4575 
4576  crs_graph_type& theGraph = (graph == NULL) ? *myGraph_ : *graph;
4577  const bool sortGraph = false; // we'll sort graph & matrix together below
4578  theGraph.reindexColumns (newColMap, newImport, sortGraph);
4579  if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
4580  const LocalOrdinal lclNumRows =
4581  static_cast<LocalOrdinal> (theGraph.getNodeNumRows ());
4582  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
4583  const RowInfo rowInfo = theGraph.getRowInfo (row);
4584  auto lclColInds = theGraph.getLocalKokkosRowViewNonConst (rowInfo);
4585  auto vals = this->getRowViewNonConst (rowInfo);
4586  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least for
4587  // lclColInds, if not also for values.
4588  sort2 (lclColInds.data (),
4589  lclColInds.data () + rowInfo.numEntries,
4590  vals.data ());
4591  }
4592  theGraph.indicesAreSorted_ = true;
4593  }
4594  }
4595 
4596  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4597  void
4599  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4600  Teuchos::RCP<const import_type>& newImporter)
4601  {
4602  const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
4603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4604  myGraph_.is_null (), std::runtime_error,
4605  "This method does not work if the matrix has a const graph. The whole "
4606  "idea of a const graph is that you are not allowed to change it, but this"
4607  " method necessarily must modify the graph, since the graph owns the "
4608  "matrix's domain Map and Import objects.");
4609  myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
4610  }
4611 
4612  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4613  void
4615  insertNonownedGlobalValues (const GlobalOrdinal globalRow,
4616  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4617  const Teuchos::ArrayView<const Scalar>& values)
4618  {
4619  using Teuchos::Array;
4620  typedef GlobalOrdinal GO;
4621  typedef typename Array<GO>::size_type size_type;
4622 
4623  const size_type numToInsert = indices.size ();
4624  // Add the new data to the list of nonlocals.
4625  // This creates the arrays if they don't exist yet.
4626  std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4627  Array<GO>& curRowInds = curRow.first;
4628  Array<Scalar>& curRowVals = curRow.second;
4629  const size_type newCapacity = curRowInds.size () + numToInsert;
4630  curRowInds.reserve (newCapacity);
4631  curRowVals.reserve (newCapacity);
4632  for (size_type k = 0; k < numToInsert; ++k) {
4633  curRowInds.push_back (indices[k]);
4634  curRowVals.push_back (values[k]);
4635  }
4636  }
4637 
4638  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4639  void
4642  {
4643  using ::Tpetra::Details::ProfilingRegion;
4644  using Teuchos::Comm;
4645  using Teuchos::outArg;
4646  using Teuchos::RCP;
4647  using Teuchos::rcp;
4648  using Teuchos::REDUCE_MAX;
4649  using Teuchos::REDUCE_MIN;
4650  using Teuchos::reduceAll;
4652  //typedef LocalOrdinal LO;
4653  typedef GlobalOrdinal GO;
4654  typedef typename Teuchos::Array<GO>::size_type size_type;
4655  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
4656  ProfilingRegion regionGlobalAssemble ("Tpetra::CrsMatrix::globalAssemble");
4657 
4658  RCP<const Comm<int> > comm = getComm ();
4659 
4660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4661  (! isFillActive (), std::runtime_error, "Fill must be active before "
4662  "you may call this method.");
4663 
4664  const size_t myNumNonlocalRows = nonlocals_.size ();
4665 
4666  // If no processes have nonlocal rows, then we don't have to do
4667  // anything. Checking this is probably cheaper than constructing
4668  // the Map of nonlocal rows (see below) and noticing that it has
4669  // zero global entries.
4670  {
4671  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4672  int someoneHasNonlocalRows = 0;
4673  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4674  outArg (someoneHasNonlocalRows));
4675  if (someoneHasNonlocalRows == 0) {
4676  return; // no process has nonlocal rows, so nothing to do
4677  }
4678  }
4679 
4680  // 1. Create a list of the "nonlocal" rows on each process. this
4681  // requires iterating over nonlocals_, so while we do this,
4682  // deduplicate the entries and get a count for each nonlocal
4683  // row on this process.
4684  // 2. Construct a new row Map corresponding to those rows. This
4685  // Map is likely overlapping. We know that the Map is not
4686  // empty on all processes, because the above all-reduce and
4687  // return exclude that case.
4688 
4689  RCP<const map_type> nonlocalRowMap;
4690  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
4691  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4692  {
4693  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4694  size_type curPos = 0;
4695  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4696  ++mapIter, ++curPos) {
4697  myNonlocalGblRows[curPos] = mapIter->first;
4698  // Get the values and column indices by reference, since we
4699  // intend to change them in place (that's what "erase" does).
4700  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4701  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4702 
4703  // Sort both arrays jointly, using the column indices as keys,
4704  // then merge them jointly. "Merge" here adds values
4705  // corresponding to the same column indices. The first 2 args
4706  // of merge2 are output arguments that work just like the
4707  // return value of std::unique.
4708  sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4709  typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4710  typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4711  merge2 (gblCols_newEnd, vals_newEnd,
4712  gblCols.begin (), gblCols.end (),
4713  vals.begin (), vals.end ());
4714  gblCols.erase (gblCols_newEnd, gblCols.end ());
4715  vals.erase (vals_newEnd, vals.end ());
4716  numEntPerNonlocalRow[curPos] = gblCols.size ();
4717  }
4718 
4719  // Currently, Map requires that its indexBase be the global min
4720  // of all its global indices. Map won't compute this for us, so
4721  // we must do it. If our process has no nonlocal rows, set the
4722  // "min" to the max possible GO value. This ensures that if
4723  // some process has at least one nonlocal row, then it will pick
4724  // that up as the min. We know that at least one process has a
4725  // nonlocal row, since the all-reduce and return at the top of
4726  // this method excluded that case.
4727  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4728  {
4729  auto iter = std::min_element (myNonlocalGblRows.begin (),
4730  myNonlocalGblRows.end ());
4731  if (iter != myNonlocalGblRows.end ()) {
4732  myMinNonlocalGblRow = *iter;
4733  }
4734  }
4735  GO gblMinNonlocalGblRow = 0;
4736  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4737  outArg (gblMinNonlocalGblRow));
4738  const GO indexBase = gblMinNonlocalGblRow;
4739  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4740  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4741  }
4742 
4743  // 3. Use the values and column indices for each nonlocal row, as
4744  // stored in nonlocals_, to construct a CrsMatrix corresponding
4745  // to nonlocal rows. We may use StaticProfile, since we have
4746  // exact counts of the number of entries in each nonlocal row.
4747 
4748  RCP<crs_matrix_type> nonlocalMatrix =
4749  rcp (new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow (),
4750  StaticProfile));
4751  {
4752  size_type curPos = 0;
4753  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4754  ++mapIter, ++curPos) {
4755  const GO gblRow = mapIter->first;
4756  // Get values & column indices by ref, just to avoid copy.
4757  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4758  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4759  //const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
4760  nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4761  }
4762  }
4763  // There's no need to fill-complete the nonlocals matrix.
4764  // We just use it as a temporary container for the Export.
4765 
4766  // 4. If the original row Map is one to one, then we can Export
4767  // directly from nonlocalMatrix into this. Otherwise, we have
4768  // to create a temporary matrix with a one-to-one row Map,
4769  // Export into that, then Import from the temporary matrix into
4770  // *this.
4771 
4772  auto origRowMap = this->getRowMap ();
4773  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4774 
4775  int isLocallyComplete = 1; // true by default
4776 
4777  if (origRowMapIsOneToOne) {
4778  export_type exportToOrig (nonlocalRowMap, origRowMap);
4779  if (! exportToOrig.isLocallyComplete ()) {
4780  isLocallyComplete = 0;
4781  }
4782  this->doExport (*nonlocalMatrix, exportToOrig, Tpetra::ADD);
4783  // We're done at this point!
4784  }
4785  else {
4786  // If you ask a Map whether it is one to one, it does some
4787  // communication and stashes intermediate results for later use
4788  // by createOneToOne. Thus, calling createOneToOne doesn't cost
4789  // much more then the original cost of calling isOneToOne.
4790  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
4791  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4792  if (! exportToOneToOne.isLocallyComplete ()) {
4793  isLocallyComplete = 0;
4794  }
4795 
4796  // Create a temporary matrix with the one-to-one row Map.
4797  //
4798  // TODO (mfh 09 Sep 2016, 12 Sep 2016) Estimate # entries in
4799  // each row, to avoid reallocation during the Export operation.
4800  crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4801  // Export from matrix of nonlocals into the temp one-to-one matrix.
4802  oneToOneMatrix.doExport (*nonlocalMatrix, exportToOneToOne, Tpetra::ADD);
4803 
4804  // We don't need the matrix of nonlocals anymore, so get rid of
4805  // it, to keep the memory high-water mark down.
4806  nonlocalMatrix = Teuchos::null;
4807 
4808  // Import from the one-to-one matrix to the original matrix.
4809  import_type importToOrig (oneToOneRowMap, origRowMap);
4810  this->doImport (oneToOneMatrix, importToOrig, Tpetra::ADD);
4811  }
4812 
4813  // It's safe now to clear out nonlocals_, since we've already
4814  // committed side effects to *this. The standard idiom for
4815  // clearing a Container like std::map, is to swap it with an empty
4816  // Container and let the swapped Container fall out of scope.
4817  decltype (nonlocals_) newNonlocals;
4818  std::swap (nonlocals_, newNonlocals);
4819 
4820  // FIXME (mfh 12 Sep 2016) I don't like this all-reduce, and I
4821  // don't like throwing an exception here. A local return value
4822  // would likely be more useful to users. However, if users find
4823  // themselves exercising nonlocal inserts often, then they are
4824  // probably novice users who need the help. See Gibhub Issues
4825  // #603 and #601 (esp. the latter) for discussion.
4826 
4827  int isGloballyComplete = 0; // output argument of reduceAll
4828  reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4829  outArg (isGloballyComplete));
4830  TEUCHOS_TEST_FOR_EXCEPTION
4831  (isGloballyComplete != 1, std::runtime_error, "On at least one process, "
4832  "you called insertGlobalValues with a global row index which is not in "
4833  "the matrix's row Map on any process in its communicator.");
4834  }
4835 
4836  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4837  void
4839  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
4840  {
4841  if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
4842  myGraph_->resumeFill (params);
4843  }
4844  clearGlobalConstants ();
4845  fillComplete_ = false;
4846  }
4847 
4848  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4849  void
4852  {
4853  // This method doesn't do anything. The analogous method in
4854  // CrsGraph does actually compute something.
4855  //
4856  // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
4857  // setting it to -1), but computeGlobalConstants() does _not_
4858  // compute the Frobenius norm; this is done on demand in
4859  // getFrobeniusNorm(), and the result is cached there.
4860  }
4861 
4862  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4863  bool
4866  return getCrsGraphRef ().haveGlobalConstants ();
4867  }
4868 
4869  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4870  void
4873  // We use -1 to indicate that the Frobenius norm needs to be
4874  // recomputed, since the values might change between now and the
4875  // next fillComplete call.
4876  //
4877  // Oddly enough, clearGlobalConstants() clears frobNorm_, but
4878  // computeGlobalConstants() does _not_ compute the Frobenius norm;
4879  // this is done on demand in getFrobeniusNorm(), and the result is
4880  // cached there.
4881  frobNorm_ = -STM::one ();
4882  }
4883 
4884  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4885  void
4887  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
4888  {
4889  const char tfecfFuncName[] = "fillComplete(params): ";
4890 
4891  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4892  (this->getCrsGraph ().is_null (), std::logic_error,
4893  "getCrsGraph() returns null. This should not happen at this point. "
4894  "Please report this bug to the Tpetra developers.");
4895 
4896  const crs_graph_type& graph = this->getCrsGraphRef ();
4897  if (this->isStaticGraph () && graph.isFillComplete ()) {
4898  // If this matrix's graph is fill complete and the user did not
4899  // supply a domain or range Map, use the graph's domain and
4900  // range Maps.
4901  this->fillComplete (graph.getDomainMap (), graph.getRangeMap (), params);
4902  }
4903  else { // assume that user's row Map is the domain and range Map
4904  Teuchos::RCP<const map_type> rangeMap = graph.getRowMap ();
4905  Teuchos::RCP<const map_type> domainMap = rangeMap;
4906  this->fillComplete (domainMap, rangeMap, params);
4907  }
4908  }
4909 
4910  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4911  void
4913  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
4914  const Teuchos::RCP<const map_type>& rangeMap,
4915  const Teuchos::RCP<Teuchos::ParameterList>& params)
4916  {
4917  using ::Tpetra::Details::ProfilingRegion;
4918  using Teuchos::ArrayRCP;
4919  using Teuchos::RCP;
4920  using Teuchos::rcp;
4921  const char tfecfFuncName[] = "fillComplete: ";
4922  ProfilingRegion regionFillComplete ("Tpetra::CrsMatrix::fillComplete");
4923 
4924  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4925  (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4926  "Matrix fill state must be active (isFillActive() "
4927  "must be true) before you may call fillComplete().");
4928  const int numProcs = this->getComm ()->getSize ();
4929 
4930  //
4931  // Read parameters from the input ParameterList.
4932  //
4933 
4934  // If true, the caller promises that no process did nonlocal
4935  // changes since the last call to fillComplete.
4936  bool assertNoNonlocalInserts = false;
4937  // If true, makeColMap sorts remote GIDs (within each remote
4938  // process' group).
4939  bool sortGhosts = true;
4940 
4941  if (! params.is_null ()) {
4942  assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
4943  assertNoNonlocalInserts);
4944  if (params->isParameter ("sort column map ghost gids")) {
4945  sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
4946  }
4947  else if (params->isParameter ("Sort column Map ghost GIDs")) {
4948  sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
4949  }
4950  }
4951  // We also don't need to do global assembly if there is only one
4952  // process in the communicator.
4953  const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4954  // This parameter only matters if this matrix owns its graph.
4955  if (! this->myGraph_.is_null ()) {
4956  this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4957  }
4958 
4959  if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4960  if (this->hasColMap ()) {
4961  // We have a column Map, so use local indices.
4962  this->allocateValues (LocalIndices, GraphNotYetAllocated);
4963  } else {
4964  // We don't have a column Map, so use global indices.
4965  this->allocateValues (GlobalIndices, GraphNotYetAllocated);
4966  }
4967  }
4968  // Global assemble, if we need to. This call only costs a single
4969  // all-reduce if we didn't need global assembly after all.
4970  if (needGlobalAssemble) {
4971  this->globalAssemble ();
4972  }
4973  else {
4974  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4975  (numProcs == 1 && nonlocals_.size() > 0,
4976  std::runtime_error, "Cannot have nonlocal entries on a serial run. "
4977  "An invalid entry (i.e., with row index not in the row Map) must have "
4978  "been submitted to the CrsMatrix.");
4979  }
4980 
4981  if (this->isStaticGraph ()) {
4982  // FIXME (mfh 14 Nov 2016) In order to fix #843, I enable the
4983  // checks below only in debug mode. It would be nicer to do a
4984  // local check, then propagate the error state in a deferred
4985  // way, whenever communication happens. That would reduce the
4986  // cost of checking, to the point where it may make sense to
4987  // enable it even in release mode.
4988 #ifdef HAVE_TPETRA_DEBUG
4989  // FIXME (mfh 18 Jun 2014) This check for correctness of the
4990  // input Maps incurs a penalty of two all-reduces for the
4991  // otherwise optimal const graph case.
4992  //
4993  // We could turn these (max) 2 all-reduces into (max) 1, by
4994  // fusing them. We could do this by adding a "locallySameAs"
4995  // method to Map, which would return one of four states:
4996  //
4997  // a. Certainly globally the same
4998  // b. Certainly globally not the same
4999  // c. Locally the same
5000  // d. Locally not the same
5001  //
5002  // The first two states don't require further communication.
5003  // The latter two states require an all-reduce to communicate
5004  // globally, but we only need one all-reduce, since we only need
5005  // to check whether at least one of the Maps is wrong.
5006  const bool domainMapsMatch =
5007  this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
5008  const bool rangeMapsMatch =
5009  this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
5010 
5011  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5012  (! domainMapsMatch, std::runtime_error,
5013  "The CrsMatrix's domain Map does not match the graph's domain Map. "
5014  "The graph cannot be changed because it was given to the CrsMatrix "
5015  "constructor as const. You can fix this by passing in the graph's "
5016  "domain Map and range Map to the matrix's fillComplete call.");
5017 
5018  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5019  (! rangeMapsMatch, std::runtime_error,
5020  "The CrsMatrix's range Map does not match the graph's range Map. "
5021  "The graph cannot be changed because it was given to the CrsMatrix "
5022  "constructor as const. You can fix this by passing in the graph's "
5023  "domain Map and range Map to the matrix's fillComplete call.");
5024 #endif // HAVE_TPETRA_DEBUG
5025 
5026  // The matrix does _not_ own the graph, and the graph's
5027  // structure is already fixed, so just fill the local matrix.
5028  this->fillLocalMatrix (params);
5029  }
5030  else {
5031  // Set the graph's domain and range Maps. This will clear the
5032  // Import if the domain Map has changed (is a different
5033  // pointer), and the Export if the range Map has changed (is a
5034  // different pointer).
5035  this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
5036 
5037  // Make the graph's column Map, if necessary.
5038  Teuchos::Array<int> remotePIDs (0);
5039  const bool mustBuildColMap = ! this->hasColMap ();
5040  if (mustBuildColMap) {
5041  this->myGraph_->makeColMap (remotePIDs);
5042  }
5043 
5044  // Make indices local, if necessary. The method won't do
5045  // anything if the graph is already locally indexed.
5046  const std::pair<size_t, std::string> makeIndicesLocalResult =
5047  this->myGraph_->makeIndicesLocal ();
5048  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
5049  // the error state to makeImportExport or
5050  // computeGlobalConstants, which may do all-reduces and thus may
5051  // have the opportunity to communicate that error state.
5052  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5053  (makeIndicesLocalResult.first != 0, std::runtime_error,
5054  makeIndicesLocalResult.second);
5055 
5056  const bool sorted = this->myGraph_->isSorted ();
5057  const bool merged = this->myGraph_->isMerged ();
5058  this->sortAndMergeIndicesAndValues (sorted, merged);
5059 
5060  // Make Import and Export objects, if they haven't been made
5061  // already. If we made a column Map above, reuse information
5062  // from that process to avoid communiation in the Import setup.
5063  this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
5064 
5065  // The matrix _does_ own the graph, so fill the local graph at
5066  // the same time as the local matrix.
5067  this->fillLocalGraphAndMatrix (params);
5068 
5069  const bool callGraphComputeGlobalConstants = params.get () == nullptr ||
5070  params->get ("compute global constants", true);
5071  const bool computeLocalTriangularConstants = params.get () == nullptr ||
5072  params->get ("compute local triangular constants", true);
5073  if (callGraphComputeGlobalConstants) {
5074  this->myGraph_->computeGlobalConstants (computeLocalTriangularConstants);
5075  }
5076  else {
5077  this->myGraph_->computeLocalConstants (computeLocalTriangularConstants);
5078  }
5079  this->myGraph_->fillComplete_ = true;
5080  this->myGraph_->checkInternalState ();
5081  }
5082 
5083  const bool callComputeGlobalConstants = params.get () == nullptr ||
5084  params->get ("compute global constants", true);
5085  if (callComputeGlobalConstants) {
5086  this->computeGlobalConstants ();
5087  }
5088 
5089  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
5090 
5091  this->fillComplete_ = true; // Now we're fill complete!
5092  this->checkInternalState ();
5093  }
5094 
5095  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5096  void
5098  expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
5099  const Teuchos::RCP<const map_type> & rangeMap,
5100  const Teuchos::RCP<const import_type>& importer,
5101  const Teuchos::RCP<const export_type>& exporter,
5102  const Teuchos::RCP<Teuchos::ParameterList> &params)
5103  {
5104 #ifdef HAVE_TPETRA_MMM_TIMINGS
5105  std::string label;
5106  if(!params.is_null())
5107  label = params->get("Timer Label",label);
5108  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
5109  using Teuchos::TimeMonitor;
5110 
5111  Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-all")));
5112 #endif
5113 
5114  const char tfecfFuncName[] = "expertStaticFillComplete: ";
5115  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
5116  std::runtime_error, "Matrix fill state must be active (isFillActive() "
5117  "must be true) before calling fillComplete().");
5118  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5119  myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
5120 
5121  {
5122 #ifdef HAVE_TPETRA_MMM_TIMINGS
5123  Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string("eSFC-M-Graph")));
5124 #endif
5125  // We will presume globalAssemble is not needed, so we do the ESFC on the graph
5126  myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
5127  }
5128 
5129  const bool callComputeGlobalConstants = params.get () == nullptr ||
5130  params->get ("compute global constants", true);
5131  if (callComputeGlobalConstants) {
5132  this->computeGlobalConstants ();
5133  }
5134 
5135  {
5136 #ifdef HAVE_TPETRA_MMM_TIMINGS
5137  TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string("eSFC-M-fLGAM")));
5138 #endif
5139  // Fill the local graph and matrix
5140  fillLocalGraphAndMatrix (params);
5141  }
5142  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
5143 
5144  // Now we're fill complete!
5145  fillComplete_ = true;
5146 
5147  // Sanity checks at the end.
5148 #ifdef HAVE_TPETRA_DEBUG
5149  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
5150  ": We're at the end of fillComplete(), but isFillActive() is true. "
5151  "Please report this bug to the Tpetra developers.");
5152  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
5153  ": We're at the end of fillComplete(), but isFillActive() is true. "
5154  "Please report this bug to the Tpetra developers.");
5155 #endif // HAVE_TPETRA_DEBUG
5156  {
5157 #ifdef HAVE_TPETRA_MMM_TIMINGS
5158  Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS")));
5159 #endif
5160 
5161  checkInternalState();
5162  }
5163  }
5164 
5165  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5166  size_t
5169  const RowInfo& rowInfo)
5170  {
5171 #ifdef HAVE_TPETRA_DEBUG
5172  const char tfecfFuncName[] = "mergeRowIndicesAndValues: ";
5173 #endif // HAVE_TPETRA_DEBUG
5174 
5175  auto rowValues = this->getRowViewNonConst (rowInfo);
5176  typedef typename std::decay<decltype (rowValues[0]) >::type value_type;
5177  value_type* rowValueIter = rowValues.data ();
5178  auto inds_view = graph.getLocalKokkosRowViewNonConst (rowInfo);
5179 
5180  // beg,end define a half-exclusive interval over which to iterate.
5181  LocalOrdinal* beg = inds_view.data ();
5182  LocalOrdinal* end = inds_view.data () + rowInfo.numEntries;
5183 
5184 #ifdef HAVE_TPETRA_DEBUG
5185  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5186  (rowInfo.allocSize != static_cast<size_t> (inds_view.extent (0)) ||
5187  rowInfo.allocSize != static_cast<size_t> (rowValues.extent (0)),
5188  std::runtime_error, "rowInfo.allocSize = " << rowInfo.allocSize
5189  << " != inds_view.extent(0) = " << inds_view.extent (0)
5190  << " || rowInfo.allocSize = " << rowInfo.allocSize
5191  << " != rowValues.extent(0) = " << rowValues.extent (0) << ".");
5192 #endif // HAVE_TPETRA_DEBUG
5193 
5194  LocalOrdinal* newend = beg;
5195  if (beg != end) {
5196  LocalOrdinal* cur = beg + 1;
5197  value_type* vcur = rowValueIter + 1;
5198  value_type* vend = rowValueIter;
5199  cur = beg+1;
5200  while (cur != end) {
5201  if (*cur != *newend) {
5202  // new entry; save it
5203  ++newend;
5204  ++vend;
5205  (*newend) = (*cur);
5206  (*vend) = (*vcur);
5207  }
5208  else {
5209  // old entry; merge it
5210  //(*vend) = f (*vend, *vcur);
5211  (*vend) += *vcur;
5212  }
5213  ++cur;
5214  ++vcur;
5215  }
5216  ++newend; // one past the last entry, per typical [beg,end) semantics
5217  }
5218  const size_t mergedEntries = newend - beg;
5219  graph.k_numRowEntries_(rowInfo.localRow) = mergedEntries;
5220  const size_t numDups = rowInfo.numEntries - mergedEntries;
5221  return numDups;
5222  }
5223 
5224  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5225  void
5227  sortAndMergeIndicesAndValues (const bool sorted, const bool merged)
5228  {
5229  using ::Tpetra::Details::ProfilingRegion;
5230  typedef LocalOrdinal LO;
5231  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5232  host_execution_space;
5233  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5234  //typedef Kokkos::RangePolicy<Kokkos::Serial, LO> range_type;
5235  const char tfecfFuncName[] = "sortAndMergeIndicesAndValues: ";
5236  ProfilingRegion regionSAM ("Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
5237 
5238  if (! sorted || ! merged) {
5239  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5240  (this->isStaticGraph (), std::runtime_error, "Cannot sort or merge with "
5241  "\"static\" (const) graph, since the matrix does not own the graph.");
5242  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5243  (this->myGraph_.is_null (), std::logic_error, "myGraph_ is null, but "
5244  "this matrix claims ! isStaticGraph(). "
5245  "Please report this bug to the Tpetra developers.");
5246  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5247  (this->isStorageOptimized (), std::logic_error, "It is invalid to call "
5248  "this method if the graph's storage has already been optimized. "
5249  "Please report this bug to the Tpetra developers.");
5250 
5251  crs_graph_type& graph = * (this->myGraph_);
5252  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5253  size_t totalNumDups = 0;
5254  // FIXME (mfh 10 May 2017) This may assume CUDA UVM.
5255  Kokkos::parallel_reduce (range_type (0, lclNumRows),
5256  [this, &graph, sorted, merged] (const LO& lclRow, size_t& numDups) {
5257  const RowInfo rowInfo = graph.getRowInfo (lclRow);
5258  if (! sorted) {
5259  auto lclColInds = graph.getLocalKokkosRowViewNonConst (rowInfo);
5260  auto vals = this->getRowViewNonConst (rowInfo);
5261  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least
5262  // for lclColInds, if not also for values.
5263  sort2 (lclColInds.data (),
5264  lclColInds.data () + rowInfo.numEntries,
5265  vals.data ());
5266  }
5267  if (! merged) {
5268  numDups += this->mergeRowIndicesAndValues (graph, rowInfo);
5269  }
5270  }, totalNumDups);
5271  if (! sorted) {
5272  graph.indicesAreSorted_ = true; // we just sorted every row
5273  }
5274  if (! merged) {
5275  graph.noRedundancies_ = true; // we just merged every row
5276  }
5277  }
5278  }
5279 
5280  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5281  void
5285  Scalar alpha,
5286  Scalar beta) const
5287  {
5289  using Teuchos::RCP;
5290  using Teuchos::rcp;
5291  using Teuchos::rcp_const_cast;
5292  using Teuchos::rcpFromRef;
5293  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5294  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
5295 
5296  // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
5297  // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
5298  // failing only for the Kokkos refactor version of Tpetra. It's a
5299  // good idea regardless to have the bypass.
5300  if (alpha == ZERO) {
5301  if (beta == ZERO) {
5302  Y_in.putScalar (ZERO);
5303  } else if (beta != ONE) {
5304  Y_in.scale (beta);
5305  }
5306  return;
5307  }
5308 
5309  // It's possible that X is a view of Y or vice versa. We don't
5310  // allow this (apply() requires that X and Y not alias one
5311  // another), but it's helpful to detect and work around this case.
5312  // We don't try to to detect the more subtle cases (e.g., one is a
5313  // subview of the other, but their initial pointers differ). We
5314  // only need to do this if this matrix's Import is trivial;
5315  // otherwise, we don't actually apply the operator from X into Y.
5316 
5317  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5318  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5319 
5320  // If beta == 0, then the output MV will be overwritten; none of
5321  // its entries should be read. (Sparse BLAS semantics say that we
5322  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
5323  // This matters if we need to do an Export operation; see below.
5324  const bool Y_is_overwritten = (beta == ZERO);
5325 
5326  // We treat the case of a replicated MV output specially.
5327  const bool Y_is_replicated =
5328  (! Y_in.isDistributed () && this->getComm ()->getSize () != 1);
5329 
5330  // This is part of the special case for replicated MV output.
5331  // We'll let each process do its thing, but do an all-reduce at
5332  // the end to sum up the results. Setting beta=0 on all processes
5333  // but Proc 0 makes the math work out for the all-reduce. (This
5334  // assumes that the replicated data is correctly replicated, so
5335  // that the data are the same on all processes.)
5336  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5337  beta = ZERO;
5338  }
5339 
5340  // Temporary MV for Import operation. After the block of code
5341  // below, this will be an (Imported if necessary) column Map MV
5342  // ready to give to localMultiply().
5343  RCP<const MV> X_colMap;
5344  if (importer.is_null ()) {
5345  if (! X_in.isConstantStride ()) {
5346  // Not all sparse mat-vec kernels can handle an input MV with
5347  // nonconstant stride correctly, so we have to copy it in that
5348  // case into a constant stride MV. To make a constant stride
5349  // copy of X_in, we force creation of the column (== domain)
5350  // Map MV (if it hasn't already been created, else fetch the
5351  // cached copy). This avoids creating a new MV each time.
5352  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
5353  Tpetra::deep_copy (*X_colMapNonConst, X_in);
5354  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5355  }
5356  else {
5357  // The domain and column Maps are the same, so do the local
5358  // multiply using the domain Map input MV X_in.
5359  X_colMap = rcpFromRef (X_in);
5360  }
5361  }
5362  else { // need to Import source (multi)vector
5363  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply: Import");
5364 
5365  // We're doing an Import anyway, which will copy the relevant
5366  // elements of the domain Map MV X_in into a separate column Map
5367  // MV. Thus, we don't have to worry whether X_in is constant
5368  // stride.
5369  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
5370 
5371  // Import from the domain Map MV to the column Map MV.
5372  X_colMapNonConst->doImport (X_in, *importer, INSERT);
5373  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5374  }
5375 
5376  // Temporary MV for doExport (if needed), or for copying a
5377  // nonconstant stride output MV into a constant stride MV. This
5378  // is null if we don't need the temporary MV, that is, if the
5379  // Export is trivial (null).
5380  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
5381 
5382  // If we have a nontrivial Export object, we must perform an
5383  // Export. In that case, the local multiply result will go into
5384  // the row Map multivector. We don't have to make a
5385  // constant-stride version of Y_in in this case, because we had to
5386  // make a constant stride Y_rowMap MV and do an Export anyway.
5387  if (! exporter.is_null ()) {
5388  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
5389  {
5390  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply: Export");
5391 
5392  // If we're overwriting the output MV Y_in completely (beta ==
5393  // 0), then make sure that it is filled with zeros before we
5394  // do the Export. Otherwise, the ADD combine mode will use
5395  // data in Y_in, which is supposed to be zero.
5396  if (Y_is_overwritten) {
5397  Y_in.putScalar (ZERO);
5398  }
5399  else {
5400  // Scale output MV by beta, so that doExport sums in the
5401  // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
5402  Y_in.scale (beta);
5403  }
5404  // Do the Export operation.
5405  Y_in.doExport (*Y_rowMap, *exporter, ADD);
5406  }
5407  }
5408  else { // Don't do an Export: row Map and range Map are the same.
5409  //
5410  // If Y_in does not have constant stride, or if the column Map
5411  // MV aliases Y_in, then we can't let the kernel write directly
5412  // to Y_in. Instead, we have to use the cached row (== range)
5413  // Map MV as temporary storage.
5414  //
5415  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5416  // the user passed in the same MultiVector for both X and Y. It
5417  // won't detect whether one MultiVector views the other. We
5418  // should also check the MultiVectors' raw data pointers.
5419  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
5420  // Force creating the MV if it hasn't been created already.
5421  // This will reuse a previously created cached MV.
5422  Y_rowMap = getRowMapMultiVector (Y_in, true);
5423 
5424  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
5425  // since we're overwriting it anyway.
5426  if (beta != ZERO) {
5427  Tpetra::deep_copy (*Y_rowMap, Y_in);
5428  }
5429  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
5430  Tpetra::deep_copy (Y_in, *Y_rowMap);
5431  }
5432  else {
5433  this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
5434  }
5435  }
5436 
5437  // If the range Map is a locally replicated Map, sum up
5438  // contributions from each process. We set beta = 0 on all
5439  // processes but Proc 0 initially, so this will handle the scaling
5440  // factor beta correctly.
5441  if (Y_is_replicated) {
5442  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply: Reduce Y");
5443  Y_in.reduce ();
5444  }
5445  }
5446 
5447  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5448  void
5452  const Teuchos::ETransp mode,
5453  Scalar alpha,
5454  Scalar beta) const
5455  {
5457  using Teuchos::null;
5458  using Teuchos::RCP;
5459  using Teuchos::rcp;
5460  using Teuchos::rcp_const_cast;
5461  using Teuchos::rcpFromRef;
5462  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5463 
5464  // Take shortcuts for alpha == 0.
5465  if (alpha == ZERO) {
5466  // Follow the Sparse BLAS convention by ignoring both the matrix
5467  // and X_in, in this case.
5468  if (beta == ZERO) {
5469  // Follow the Sparse BLAS convention by overwriting any Inf or
5470  // NaN values in Y_in, in this case.
5471  Y_in.putScalar (ZERO);
5472  }
5473  else {
5474  Y_in.scale (beta);
5475  }
5476  return;
5477  }
5478 
5479  const size_t numVectors = X_in.getNumVectors ();
5480 
5481  // We don't allow X_in and Y_in to alias one another. It's hard
5482  // to check this, because advanced users could create views from
5483  // raw pointers. However, if X_in and Y_in reference the same
5484  // object, we will do the user a favor by copying X into new
5485  // storage (with a warning). We only need to do this if we have
5486  // trivial importers; otherwise, we don't actually apply the
5487  // operator from X into Y.
5488  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5489  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5490  // access X indirectly, in case we need to create temporary storage
5491  RCP<const MV> X;
5492 
5493  // some parameters for below
5494  const bool Y_is_replicated = ! Y_in.isDistributed ();
5495  const bool Y_is_overwritten = (beta == ZERO);
5496  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5497  beta = ZERO;
5498  }
5499 
5500  // The kernels do not allow input or output with nonconstant stride.
5501  if (! X_in.isConstantStride () && importer.is_null ()) {
5502  X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
5503  } else {
5504  X = rcpFromRef (X_in); // Reference to X_in
5505  }
5506 
5507  // Set up temporary multivectors for Import and/or Export.
5508  if (importer != Teuchos::null) {
5509  if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
5510  importMV_ = null;
5511  }
5512  if (importMV_ == null) {
5513  importMV_ = rcp (new MV (this->getColMap (), numVectors));
5514  }
5515  }
5516  if (exporter != Teuchos::null) {
5517  if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
5518  exportMV_ = null;
5519  }
5520  if (exportMV_ == null) {
5521  exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
5522  }
5523  }
5524 
5525  // If we have a non-trivial exporter, we must import elements that
5526  // are permuted or are on other processors.
5527  if (! exporter.is_null ()) {
5528  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply (transpose): Import");
5529  exportMV_->doImport (X_in, *exporter, INSERT);
5530  X = exportMV_; // multiply out of exportMV_
5531  }
5532 
5533  // If we have a non-trivial importer, we must export elements that
5534  // are permuted or belong to other processors. We will compute
5535  // solution into the to-be-exported MV; get a view.
5536  if (importer != Teuchos::null) {
5537  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply (transpose): Export");
5538 
5539  // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
5540  // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
5541  // back and figure out why this helps. importMV_ SHOULD be
5542  // completely overwritten in the localMultiply() call below,
5543  // because beta == ZERO there.
5544  importMV_->putScalar (ZERO);
5545  // Do the local computation.
5546  this->localApply (*X, *importMV_, mode, alpha, ZERO);
5547  if (Y_is_overwritten) {
5548  Y_in.putScalar (ZERO);
5549  } else {
5550  Y_in.scale (beta);
5551  }
5552  Y_in.doExport (*importMV_, *importer, ADD);
5553  }
5554  // otherwise, multiply into Y
5555  else {
5556  // can't multiply in-situ; can't multiply into non-strided multivector
5557  //
5558  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5559  // the user passed in the same MultiVector for both X and Y. It
5560  // won't detect whether one MultiVector views the other. We
5561  // should also check the MultiVectors' raw data pointers.
5562  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
5563  // Make a deep copy of Y_in, into which to write the multiply result.
5564  MV Y (Y_in, Teuchos::Copy);
5565  this->localApply (*X, Y, mode, alpha, beta);
5566  Tpetra::deep_copy (Y_in, Y);
5567  } else {
5568  this->localApply (*X, Y_in, mode, alpha, beta);
5569  }
5570  }
5571 
5572  // If the range Map is a locally replicated map, sum the
5573  // contributions from each process. (That's why we set beta=0
5574  // above for all processes but Proc 0.)
5575  if (Y_is_replicated) {
5576  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply (transpose): Reduce Y");
5577  Y_in.reduce ();
5578  }
5579  }
5580 
5581  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5582  void
5586  const Teuchos::ETransp mode,
5587  const Scalar& alpha,
5588  const Scalar& beta) const
5589  {
5591  ProfilingRegion regionLocalApply ("Tpetra::CrsMatrix::localApply");
5592  this->template localMultiply<Scalar, Scalar> (X, Y, mode, alpha, beta);
5593  }
5594 
5595  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5596  void
5600  Teuchos::ETransp mode,
5601  Scalar alpha,
5602  Scalar beta) const
5603  {
5605  const char fnName[] = "Tpetra::CrsMatrix::apply";
5606 
5607  TEUCHOS_TEST_FOR_EXCEPTION
5608  (! isFillComplete (), std::runtime_error,
5609  fnName << ": Cannot call apply() until fillComplete() "
5610  "has been called.");
5611 
5612  if (mode == Teuchos::NO_TRANS) {
5613  ProfilingRegion regionNonTranspose (fnName);
5614  this->applyNonTranspose (X, Y, alpha, beta);
5615  }
5616  else {
5617  ProfilingRegion regionTranspose ("Tpetra::CrsMatrix::apply (transpose)");
5618 
5619  //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
5620  //when bets==0. This was not the case with transpose in a multithreaded
5621  //environment where a multiplication with subsequent atomic_adds is used
5622  //since 0 is effectively not special cased. Doing the explicit set to zero here
5623  //This catches cases where Y is nan or inf.
5624  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5625  if (beta == ZERO) {
5626  Y.putScalar (ZERO);
5627  }
5628  this->applyTranspose (X, Y, mode, alpha, beta);
5629  }
5630  }
5631 
5632  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5633  void
5638  const Scalar& dampingFactor,
5639  const ESweepDirection direction,
5640  const int numSweeps) const
5641  {
5642  reorderedGaussSeidel (B, X, D, Teuchos::null, dampingFactor, direction, numSweeps);
5643  }
5644 
5645  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5646  void
5651  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
5652  const Scalar& dampingFactor,
5653  const ESweepDirection direction,
5654  const int numSweeps) const
5655  {
5656  using Teuchos::null;
5657  using Teuchos::RCP;
5658  using Teuchos::rcp;
5659  using Teuchos::rcp_const_cast;
5660  using Teuchos::rcpFromRef;
5661  typedef Scalar ST;
5662 
5663  TEUCHOS_TEST_FOR_EXCEPTION(
5664  isFillComplete() == false, std::runtime_error,
5665  "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until "
5666  "fillComplete() has been called.");
5667  TEUCHOS_TEST_FOR_EXCEPTION(
5668  numSweeps < 0,
5669  std::invalid_argument,
5670  "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , "
5671  "nonnegative but you provided numSweeps = " << numSweeps << " < 0.");
5672 
5673  // Translate from global to local sweep direction.
5674  // While doing this, validate the input.
5675  ESweepDirection localDirection;
5676  if (direction == Forward) {
5677  localDirection = Forward;
5678  }
5679  else if (direction == Backward) {
5680  localDirection = Backward;
5681  }
5682  else if (direction == Symmetric) {
5683  // We'll control local sweep direction manually.
5684  localDirection = Forward;
5685  }
5686  else {
5687  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
5688  "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have "
5689  "any of its valid values: Forward, Backward, or Symmetric.");
5690  }
5691 
5692  if (numSweeps == 0) {
5693  return; // Nothing to do.
5694  }
5695 
5696  // We don't need the Export object because this method assumes
5697  // that the row, domain, and range Maps are the same. We do need
5698  // the Import object, if there is one, though.
5699  RCP<const import_type> importer = this->getGraph()->getImporter();
5700  RCP<const export_type> exporter = this->getGraph()->getExporter();
5701  TEUCHOS_TEST_FOR_EXCEPTION(
5702  ! exporter.is_null (), std::runtime_error,
5703  "Tpetra's gaussSeidel implementation requires that the row, domain, "
5704  "and range Maps be the same. This cannot be the case, because the "
5705  "matrix has a nontrivial Export object.");
5706 
5707  RCP<const map_type> domainMap = this->getDomainMap ();
5708  RCP<const map_type> rangeMap = this->getRangeMap ();
5709  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
5710  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
5711 
5712 #ifdef HAVE_TEUCHOS_DEBUG
5713  {
5714  // The relation 'isSameAs' is transitive. It's also a
5715  // collective, so we don't have to do a "shared" test for
5716  // exception (i.e., a global reduction on the test value).
5717  TEUCHOS_TEST_FOR_EXCEPTION(
5718  ! X.getMap ()->isSameAs (*domainMap),
5719  std::runtime_error,
5720  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5721  "multivector X be in the domain Map of the matrix.");
5722  TEUCHOS_TEST_FOR_EXCEPTION(
5723  ! B.getMap ()->isSameAs (*rangeMap),
5724  std::runtime_error,
5725  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5726  "B be in the range Map of the matrix.");
5727  TEUCHOS_TEST_FOR_EXCEPTION(
5728  ! D.getMap ()->isSameAs (*rowMap),
5729  std::runtime_error,
5730  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5731  "D be in the row Map of the matrix.");
5732  TEUCHOS_TEST_FOR_EXCEPTION(
5733  ! rowMap->isSameAs (*rangeMap),
5734  std::runtime_error,
5735  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
5736  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
5737  TEUCHOS_TEST_FOR_EXCEPTION(
5738  ! domainMap->isSameAs (*rangeMap),
5739  std::runtime_error,
5740  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
5741  "the range Map of the matrix be the same.");
5742  }
5743 #else
5744  // Forestall any compiler warnings for unused variables.
5745  (void) rangeMap;
5746  (void) rowMap;
5747 #endif // HAVE_TEUCHOS_DEBUG
5748 
5749  // If B is not constant stride, copy it into a constant stride
5750  // multivector. We'l handle the right-hand side B first and deal
5751  // with X right before the sweeps, to improve locality of the
5752  // first sweep. (If the problem is small enough, then that will
5753  // hopefully keep more of the entries of X in cache. This
5754  // optimizes for the typical case of a small number of sweeps.)
5755  RCP<const MV> B_in;
5756  if (B.isConstantStride()) {
5757  B_in = rcpFromRef (B);
5758  }
5759  else {
5760  // The range Map and row Map are the same in this case, so we
5761  // can use the (possibly cached) row Map multivector to store a
5762  // constant stride copy of B. We don't have to copy back, since
5763  // Gauss-Seidel won't modify B.
5764  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
5765  deep_copy (*B_in_nonconst, B); // Copy from B into B_in(_nonconst).
5766  B_in = rcp_const_cast<const MV> (B_in_nonconst);
5767 
5769  ! B.isConstantStride (),
5770  std::runtime_error,
5771  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
5772  "requires that X and B both have constant stride. Since B does not "
5773  "have constant stride, we had to make a copy. This is a limitation of "
5774  "the current implementation and not your fault, but we still report it "
5775  "as an efficiency warning for your information.");
5776  }
5777 
5778  // If X is not constant stride, copy it into a constant stride
5779  // multivector. Also, make the column Map multivector X_colMap,
5780  // and its domain Map view X_domainMap. (X actually must be a
5781  // domain Map view of a column Map multivector; exploit this, if X
5782  // has constant stride.)
5783 
5784  RCP<MV> X_domainMap;
5785  RCP<MV> X_colMap;
5786  bool copiedInput = false;
5787 
5788  if (importer.is_null ()) { // Domain and column Maps are the same.
5789  if (X.isConstantStride ()) {
5790  X_domainMap = rcpFromRef (X);
5791  X_colMap = X_domainMap;
5792  copiedInput = false;
5793  }
5794  else {
5795  // Get a temporary column Map multivector, make a domain Map
5796  // view of it, and copy X into the domain Map view. We have
5797  // to copy here because we won't be doing Import operations.
5798  X_colMap = getColumnMapMultiVector (X, true);
5799  X_domainMap = X_colMap; // Domain and column Maps are the same.
5800  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
5801  copiedInput = true;
5803  ! X.isConstantStride (), std::runtime_error,
5804  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
5805  "Gauss-Seidel kernel requires that X and B both have constant "
5806  "stride. Since X does not have constant stride, we had to make a "
5807  "copy. This is a limitation of the current implementation and not "
5808  "your fault, but we still report it as an efficiency warning for "
5809  "your information.");
5810  }
5811  }
5812  else { // We will be doing Import operations in the sweeps.
5813  if (X.isConstantStride ()) {
5814  X_domainMap = rcpFromRef (X);
5815  // This kernel assumes that X is a domain Map view of a column
5816  // Map multivector. We will only check if this is valid if
5817  // the CMake configure Teuchos_ENABLE_DEBUG is ON.
5818  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
5819 
5820  // FIXME (mfh 19 Mar 2013) Do we need to fill the remote
5821  // entries of X_colMap with zeros? Do we need to fill all of
5822  // X_domainMap initially with zeros? Ifpack
5823  // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely
5824  // new MultiVector each time.
5825 
5826  // Do the first Import for the first sweep. This simplifies
5827  // the logic in the sweeps.
5828  X_colMap->doImport (X, *importer, INSERT);
5829  copiedInput = false;
5830  }
5831  else {
5832  // Get a temporary column Map multivector X_colMap, and make a
5833  // domain Map view X_domainMap of it. Instead of copying, we
5834  // do an Import from X into X_domainMap. This saves us a
5835  // copy, since the Import has to copy the data anyway.
5836  X_colMap = getColumnMapMultiVector (X, true);
5837  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
5838  X_colMap->doImport (X, *importer, INSERT);
5839  copiedInput = true;
5841  ! X.isConstantStride (), std::runtime_error,
5842  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
5843  "Gauss-Seidel kernel requires that X and B both have constant stride. "
5844  "Since X does not have constant stride, we had to make a copy. "
5845  "This is a limitation of the current implementation and not your fault, "
5846  "but we still report it as an efficiency warning for your information.");
5847  }
5848  }
5849 
5850  for (int sweep = 0; sweep < numSweeps; ++sweep) {
5851  if (! importer.is_null () && sweep > 0) {
5852  // We already did the first Import for the zeroth sweep.
5853  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5854  }
5855 
5856  // Do local Gauss-Seidel.
5857  if (direction != Symmetric) {
5858  if (rowIndices.is_null ()) {
5859  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5860  dampingFactor,
5861  localDirection);
5862  }
5863  else {
5864  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5865  D, rowIndices,
5866  dampingFactor,
5867  localDirection);
5868  }
5869  }
5870  else { // direction == Symmetric
5871  const bool doImportBetweenDirections = false;
5872  if (rowIndices.is_null ()) {
5873  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5874  dampingFactor,
5875  Forward);
5876  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
5877  // Gauss-Seidel" does _not_ do an Import between the forward
5878  // and backward sweeps. This makes sense, because Aztec
5879  // considers "symmetric Gauss-Seidel" a subdomain solver.
5880  if (doImportBetweenDirections) {
5881  // Communicate again before the Backward sweep.
5882  if (! importer.is_null ()) {
5883  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5884  }
5885  }
5886  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5887  dampingFactor,
5888  Backward);
5889  }
5890  else {
5891  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5892  D, rowIndices,
5893  dampingFactor,
5894  Forward);
5895  if (doImportBetweenDirections) {
5896  // Communicate again before the Backward sweep.
5897  if (! importer.is_null ()) {
5898  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5899  }
5900  }
5901  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5902  D, rowIndices,
5903  dampingFactor,
5904  Backward);
5905  }
5906  }
5907  }
5908 
5909  if (copiedInput) {
5910  deep_copy (X, *X_domainMap); // Copy back from X_domainMap to X.
5911  }
5912  }
5913 
5914  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5915  void
5920  const Scalar& dampingFactor,
5921  const ESweepDirection direction,
5922  const int numSweeps,
5923  const bool zeroInitialGuess) const
5924  {
5925  reorderedGaussSeidelCopy (X, B, D, Teuchos::null, dampingFactor, direction,
5926  numSweeps, zeroInitialGuess);
5927  }
5928 
5929  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5930  void
5935  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
5936  const Scalar& dampingFactor,
5937  const ESweepDirection direction,
5938  const int numSweeps,
5939  const bool zeroInitialGuess) const
5940  {
5941  using Teuchos::null;
5942  using Teuchos::RCP;
5943  using Teuchos::rcp;
5944  using Teuchos::rcpFromRef;
5945  using Teuchos::rcp_const_cast;
5946  typedef Scalar ST;
5947  const char prefix[] = "Tpetra::CrsMatrix::(reordered)gaussSeidelCopy: ";
5948  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5949 
5950  TEUCHOS_TEST_FOR_EXCEPTION(
5951  ! isFillComplete (), std::runtime_error,
5952  prefix << "The matrix is not fill complete.");
5953  TEUCHOS_TEST_FOR_EXCEPTION(
5954  numSweeps < 0, std::invalid_argument,
5955  prefix << "The number of sweeps must be nonnegative, "
5956  "but you provided numSweeps = " << numSweeps << " < 0.");
5957 
5958  // Translate from global to local sweep direction.
5959  // While doing this, validate the input.
5960  ESweepDirection localDirection;
5961  if (direction == Forward) {
5962  localDirection = Forward;
5963  }
5964  else if (direction == Backward) {
5965  localDirection = Backward;
5966  }
5967  else if (direction == Symmetric) {
5968  // We'll control local sweep direction manually.
5969  localDirection = Forward;
5970  }
5971  else {
5972  TEUCHOS_TEST_FOR_EXCEPTION(
5973  true, std::invalid_argument,
5974  prefix << "The 'direction' enum does not have any of its valid "
5975  "values: Forward, Backward, or Symmetric.");
5976  }
5977 
5978  if (numSweeps == 0) {
5979  return;
5980  }
5981 
5982  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5983  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5984  TEUCHOS_TEST_FOR_EXCEPTION(
5985  ! exporter.is_null (), std::runtime_error,
5986  "This method's implementation currently requires that the matrix's row, "
5987  "domain, and range Maps be the same. This cannot be the case, because "
5988  "the matrix has a nontrivial Export object.");
5989 
5990  RCP<const map_type> domainMap = this->getDomainMap ();
5991  RCP<const map_type> rangeMap = this->getRangeMap ();
5992  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
5993  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
5994 
5995 #ifdef HAVE_TEUCHOS_DEBUG
5996  {
5997  // The relation 'isSameAs' is transitive. It's also a
5998  // collective, so we don't have to do a "shared" test for
5999  // exception (i.e., a global reduction on the test value).
6000  TEUCHOS_TEST_FOR_EXCEPTION(
6001  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
6002  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
6003  "multivector X be in the domain Map of the matrix.");
6004  TEUCHOS_TEST_FOR_EXCEPTION(
6005  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
6006  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
6007  "B be in the range Map of the matrix.");
6008  TEUCHOS_TEST_FOR_EXCEPTION(
6009  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
6010  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
6011  "D be in the row Map of the matrix.");
6012  TEUCHOS_TEST_FOR_EXCEPTION(
6013  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
6014  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
6015  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
6016  TEUCHOS_TEST_FOR_EXCEPTION(
6017  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
6018  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
6019  "the range Map of the matrix be the same.");
6020  }
6021 #else
6022  // Forestall any compiler warnings for unused variables.
6023  (void) rangeMap;
6024  (void) rowMap;
6025 #endif // HAVE_TEUCHOS_DEBUG
6026 
6027  // Fetch a (possibly cached) temporary column Map multivector
6028  // X_colMap, and a domain Map view X_domainMap of it. Both have
6029  // constant stride by construction. We know that the domain Map
6030  // must include the column Map, because our Gauss-Seidel kernel
6031  // requires that the row Map, domain Map, and range Map are all
6032  // the same, and that each process owns all of its own diagonal
6033  // entries of the matrix.
6034 
6035  RCP<MV> X_colMap;
6036  RCP<MV> X_domainMap;
6037  bool copyBackOutput = false;
6038  if (importer.is_null ()) {
6039  if (X.isConstantStride ()) {
6040  X_colMap = rcpFromRef (X);
6041  X_domainMap = rcpFromRef (X);
6042  // Column Map and domain Map are the same, so there are no
6043  // remote entries. Thus, if we are not setting the initial
6044  // guess to zero, we don't have to worry about setting remote
6045  // entries to zero, even though we are not doing an Import in
6046  // this case.
6047  if (zeroInitialGuess) {
6048  X_colMap->putScalar (ZERO);
6049  }
6050  // No need to copy back to X at end.
6051  }
6052  else { // We must copy X into a constant stride multivector.
6053  // Just use the cached column Map multivector for that.
6054  // force=true means fill with zeros, so no need to fill
6055  // remote entries (not in domain Map) with zeros.
6056  X_colMap = getColumnMapMultiVector (X, true);
6057  // X_domainMap is always a domain Map view of the column Map
6058  // multivector. In this case, the domain and column Maps are
6059  // the same, so X_domainMap _is_ X_colMap.
6060  X_domainMap = X_colMap;
6061  if (! zeroInitialGuess) { // Don't copy if zero initial guess
6062  try {
6063  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
6064  } catch (std::exception& e) {
6065  std::ostringstream os;
6066  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
6067  "deep_copy(*X_domainMap, X) threw an exception: "
6068  << e.what () << ".";
6069  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
6070  }
6071  }
6072  copyBackOutput = true; // Don't forget to copy back at end.
6074  ! X.isConstantStride (),
6075  std::runtime_error,
6076  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
6077  "kernel requires that X and B both have constant stride. Since X "
6078  "does not have constant stride, we had to make a copy. This is a "
6079  "limitation of the current implementation and not your fault, but we "
6080  "still report it as an efficiency warning for your information.");
6081  }
6082  }
6083  else { // Column Map and domain Map are _not_ the same.
6084  X_colMap = getColumnMapMultiVector (X);
6085  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
6086 
6087 #ifdef HAVE_TPETRA_DEBUG
6088 #ifdef KOKKOS_ENABLE_DEPRECATED_CODE
6089  auto X_colMap_host_view =
6090  X_colMap->template getLocalView<Kokkos::HostSpace> ();
6091  auto X_domainMap_host_view =
6092  X_domainMap->template getLocalView<Kokkos::HostSpace> ();
6093 #else
6094  auto X_colMap_host_view =
6095  X_colMap->getLocalViewHost ();
6096  auto X_domainMap_host_view =
6097  X_domainMap->getLocalViewHost ();
6098 #endif
6099 
6100  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
6101  TEUCHOS_TEST_FOR_EXCEPTION
6102  (X_colMap_host_view.data () != X_domainMap_host_view.data (),
6103  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: Pointer to "
6104  "start of column Map view of X is not equal to pointer to start of "
6105  "(domain Map view of) X. This may mean that Tpetra::MultiVector::"
6106  "offsetViewNonConst is broken. "
6107  "Please report this bug to the Tpetra developers.");
6108  }
6109 
6110  TEUCHOS_TEST_FOR_EXCEPTION(
6111  X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
6112  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
6113  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
6114  "X_colMap has fewer local rows than X_domainMap. "
6115  "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
6116  << ", X_domainMap_host_view.extent(0) = "
6117  << X_domainMap_host_view.extent (0)
6118  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
6119  << ", and X_domainMap->getLocalLength() = "
6120  << X_domainMap->getLocalLength ()
6121  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
6122  "is broken. Please report this bug to the Tpetra developers.");
6123 
6124  TEUCHOS_TEST_FOR_EXCEPTION(
6125  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
6126  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
6127  "X_colMap has a different number of columns than X_domainMap. "
6128  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
6129  << " != X_domainMap->getNumVectors() = "
6130  << X_domainMap->getNumVectors ()
6131  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
6132  "is broken. Please report this bug to the Tpetra developers.");
6133 #endif // HAVE_TPETRA_DEBUG
6134 
6135  if (zeroInitialGuess) {
6136  // No need for an Import, since we're filling with zeros.
6137  X_colMap->putScalar (ZERO);
6138  } else {
6139  // We could just copy X into X_domainMap. However, that
6140  // wastes a copy, because the Import also does a copy (plus
6141  // communication). Since the typical use case for
6142  // Gauss-Seidel is a small number of sweeps (2 is typical), we
6143  // don't want to waste that copy. Thus, we do the Import
6144  // here, and skip the first Import in the first sweep.
6145  // Importing directly from X effects the copy into X_domainMap
6146  // (which is a view of X_colMap).
6147  X_colMap->doImport (X, *importer, INSERT);
6148  }
6149  copyBackOutput = true; // Don't forget to copy back at end.
6150  } // if column and domain Maps are (not) the same
6151 
6152  // The Gauss-Seidel / SOR kernel expects multivectors of constant
6153  // stride. X_colMap is by construction, but B might not be. If
6154  // it's not, we have to make a copy.
6155  RCP<const MV> B_in;
6156  if (B.isConstantStride ()) {
6157  B_in = rcpFromRef (B);
6158  }
6159  else {
6160  // Range Map and row Map are the same in this case, so we can
6161  // use the cached row Map multivector to store a constant stride
6162  // copy of B.
6163  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
6164  try {
6165  deep_copy (*B_in_nonconst, B);
6166  } catch (std::exception& e) {
6167  std::ostringstream os;
6168  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
6169  "deep_copy(*B_in_nonconst, B) threw an exception: "
6170  << e.what () << ".";
6171  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
6172  }
6173  B_in = rcp_const_cast<const MV> (B_in_nonconst);
6174 
6176  ! B.isConstantStride (),
6177  std::runtime_error,
6178  "gaussSeidelCopy: The current implementation requires that B have "
6179  "constant stride. Since B does not have constant stride, we had to "
6180  "copy it into a separate constant-stride multivector. This is a "
6181  "limitation of the current implementation and not your fault, but we "
6182  "still report it as an efficiency warning for your information.");
6183  }
6184 
6185  for (int sweep = 0; sweep < numSweeps; ++sweep) {
6186  if (! importer.is_null () && sweep > 0) {
6187  // We already did the first Import for the zeroth sweep above,
6188  // if it was necessary.
6189  X_colMap->doImport (*X_domainMap, *importer, INSERT);
6190  }
6191 
6192  // Do local Gauss-Seidel.
6193  if (direction != Symmetric) {
6194  if (rowIndices.is_null ()) {
6195  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
6196  dampingFactor,
6197  localDirection);
6198  }
6199  else {
6200  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
6201  D, rowIndices,
6202  dampingFactor,
6203  localDirection);
6204  }
6205  }
6206  else { // direction == Symmetric
6207  if (rowIndices.is_null ()) {
6208  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
6209  dampingFactor,
6210  Forward);
6211  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
6212  // Gauss-Seidel" does _not_ do an Import between the forward
6213  // and backward sweeps. This makes symmetric Gauss-Seidel a
6214  // symmetric preconditioner if the matrix A is symmetric. We
6215  // imitate Aztec's behavior here.
6216  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
6217  dampingFactor,
6218  Backward);
6219  }
6220  else {
6221  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
6222  D, rowIndices,
6223  dampingFactor,
6224  Forward);
6225  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
6226  D, rowIndices,
6227  dampingFactor,
6228  Backward);
6229 
6230  }
6231  }
6232  }
6233 
6234  if (copyBackOutput) {
6235  try {
6236  deep_copy (X , *X_domainMap); // Copy result back into X.
6237  } catch (std::exception& e) {
6238  TEUCHOS_TEST_FOR_EXCEPTION(
6239  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
6240  "threw an exception: " << e.what ());
6241  }
6242  }
6243  }
6244 
6245  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6246  template<class T>
6247  Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
6249  convert () const
6250  {
6251  using Teuchos::RCP;
6252  typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> output_matrix_type;
6253  const char tfecfFuncName[] = "convert: ";
6254 
6255  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6256  (! this->isFillComplete (), std::runtime_error, "This matrix (the source "
6257  "of the conversion) is not fill complete. You must first call "
6258  "fillComplete() (possibly with the domain and range Map) without an "
6259  "intervening call to resumeFill(), before you may call this method.");
6260  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6261  (! this->isStaticGraph (), std::logic_error, "This matrix (the source "
6262  "of the conversion) claims to be fill complete, but does not have a "
6263  "static (i.e., constant) graph. Please report this bug to the Tpetra "
6264  "developers.");
6265 
6266  RCP<output_matrix_type> newMatrix
6267  (new output_matrix_type (this->getCrsGraph ()));
6268  // Copy old values into new values. impl_scalar_type and T may
6269  // differ, so we can't use Kokkos::deep_copy.
6270  ::Tpetra::Details::copyConvert (newMatrix->lclMatrix_.values,
6271  this->lclMatrix_.values);
6272  // Since newmat has a static (const) graph, the graph already has
6273  // a column Map, and Import and Export objects already exist (if
6274  // applicable). Thus, calling fillComplete is cheap.
6275  newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
6276 
6277  return newMatrix;
6278  }
6279 
6280 
6281  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6282  void
6285  {
6286 #ifdef HAVE_TPETRA_DEBUG
6287  const char tfecfFuncName[] = "checkInternalState: ";
6288  const char err[] = "Internal state is not consistent. "
6289  "Please report this bug to the Tpetra developers.";
6290 
6291  // This version of the graph (RCP<const crs_graph_type>) must
6292  // always be nonnull.
6293  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6294  staticGraph_.is_null (),
6295  std::logic_error, err);
6296  // myGraph == null means that the matrix has a const ("static")
6297  // graph. Otherwise, the matrix has a dynamic graph (it owns its
6298  // graph).
6299  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6300  ! myGraph_.is_null () && myGraph_ != staticGraph_,
6301  std::logic_error, err);
6302  // if matrix is fill complete, then graph must be fill complete
6303  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6304  isFillComplete () && ! staticGraph_->isFillComplete (),
6305  std::logic_error, err << " Specifically, the matrix is fill complete, "
6306  "but its graph is NOT fill complete.");
6307  // if matrix is storage optimized, it should have a 1D allocation
6308  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6309  isStorageOptimized () && ! values2D_.is_null (),
6310  std::logic_error, err);
6311  // if matrix/graph are static profile, then 2D allocation should not be present
6312  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6313  getProfileType() == StaticProfile && values2D_ != Teuchos::null,
6314  std::logic_error, err);
6315  // if matrix/graph are dynamic profile, then 1D allocation should not be present
6316  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6317  getProfileType() != StaticProfile && k_values1D_.extent (0) > 0,
6318  std::logic_error, err);
6319  // if values are allocated and they are non-zero in number, then
6320  // one of the allocations should be present
6321  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6322  staticGraph_->indicesAreAllocated () &&
6323  staticGraph_->getNodeAllocationSize() > 0 &&
6324  staticGraph_->getNodeNumRows() > 0
6325  && values2D_.is_null () &&
6326  k_values1D_.extent (0) == 0,
6327  std::logic_error, err);
6328  // we cannot have both a 1D and 2D allocation
6329  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6330  k_values1D_.extent (0) > 0 && values2D_ != Teuchos::null,
6331  std::logic_error, err << " Specifically, k_values1D_ is allocated (has "
6332  "size " << k_values1D_.extent (0) << " > 0) and values2D_ is also "
6333  "allocated. CrsMatrix is not suppose to have both a 1-D and a 2-D "
6334  "allocation at the same time.");
6335 #endif
6336  }
6337 
6338  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6339  std::string
6342  {
6343  std::ostringstream os;
6344 
6345  os << "Tpetra::CrsMatrix (Kokkos refactor): {";
6346  if (this->getObjectLabel () != "") {
6347  os << "Label: \"" << this->getObjectLabel () << "\", ";
6348  }
6349  if (isFillComplete ()) {
6350  os << "isFillComplete: true"
6351  << ", global dimensions: [" << getGlobalNumRows () << ", "
6352  << getGlobalNumCols () << "]"
6353  << ", global number of entries: " << getGlobalNumEntries ()
6354  << "}";
6355  }
6356  else {
6357  os << "isFillComplete: false"
6358  << ", global dimensions: [" << getGlobalNumRows () << ", "
6359  << getGlobalNumCols () << "]}";
6360  }
6361  return os.str ();
6362  }
6363 
6364  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6365  void
6367  describe (Teuchos::FancyOStream &out,
6368  const Teuchos::EVerbosityLevel verbLevel) const
6369  {
6370  using std::endl;
6371  using std::setw;
6372  using Teuchos::ArrayView;
6373  using Teuchos::Comm;
6374  using Teuchos::RCP;
6375  using Teuchos::TypeNameTraits;
6376  using Teuchos::VERB_DEFAULT;
6377  using Teuchos::VERB_NONE;
6378  using Teuchos::VERB_LOW;
6379  using Teuchos::VERB_MEDIUM;
6380  using Teuchos::VERB_HIGH;
6381  using Teuchos::VERB_EXTREME;
6382 
6383  const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
6384 
6385  if (vl == VERB_NONE) {
6386  return; // Don't print anything at all
6387  }
6388 
6389  // By convention, describe() always begins with a tab.
6390  Teuchos::OSTab tab0 (out);
6391 
6392  RCP<const Comm<int> > comm = this->getComm();
6393  const int myRank = comm->getRank();
6394  const int numProcs = comm->getSize();
6395  size_t width = 1;
6396  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
6397  ++width;
6398  }
6399  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
6400 
6401  // none: print nothing
6402  // low: print O(1) info from node 0
6403  // medium: print O(P) info, num entries per process
6404  // high: print O(N) info, num entries per row
6405  // extreme: print O(NNZ) info: print indices and values
6406  //
6407  // for medium and higher, print constituent objects at specified verbLevel
6408  if (myRank == 0) {
6409  out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
6410  }
6411  Teuchos::OSTab tab1 (out);
6412 
6413  if (myRank == 0) {
6414  if (this->getObjectLabel () != "") {
6415  out << "Label: \"" << this->getObjectLabel () << "\", ";
6416  }
6417  {
6418  out << "Template parameters:" << endl;
6419  Teuchos::OSTab tab2 (out);
6420  out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
6421  << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
6422  << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
6423  << "Node: " << TypeNameTraits<Node>::name () << endl;
6424  }
6425  if (isFillComplete()) {
6426  out << "isFillComplete: true" << endl
6427  << "Global dimensions: [" << getGlobalNumRows () << ", "
6428  << getGlobalNumCols () << "]" << endl
6429  << "Global number of entries: " << getGlobalNumEntries () << endl
6430  << endl << "Global max number of entries in a row: "
6431  << getGlobalMaxNumRowEntries () << endl;
6432  }
6433  else {
6434  out << "isFillComplete: false" << endl
6435  << "Global dimensions: [" << getGlobalNumRows () << ", "
6436  << getGlobalNumCols () << "]" << endl;
6437  }
6438  }
6439 
6440  if (vl < VERB_MEDIUM) {
6441  return; // all done!
6442  }
6443 
6444  // Describe the row Map.
6445  if (myRank == 0) {
6446  out << endl << "Row Map:" << endl;
6447  }
6448  if (getRowMap ().is_null ()) {
6449  if (myRank == 0) {
6450  out << "null" << endl;
6451  }
6452  }
6453  else {
6454  if (myRank == 0) {
6455  out << endl;
6456  }
6457  getRowMap ()->describe (out, vl);
6458  }
6459 
6460  // Describe the column Map.
6461  if (myRank == 0) {
6462  out << "Column Map: ";
6463  }
6464  if (getColMap ().is_null ()) {
6465  if (myRank == 0) {
6466  out << "null" << endl;
6467  }
6468  } else if (getColMap () == getRowMap ()) {
6469  if (myRank == 0) {
6470  out << "same as row Map" << endl;
6471  }
6472  } else {
6473  if (myRank == 0) {
6474  out << endl;
6475  }
6476  getColMap ()->describe (out, vl);
6477  }
6478 
6479  // Describe the domain Map.
6480  if (myRank == 0) {
6481  out << "Domain Map: ";
6482  }
6483  if (getDomainMap ().is_null ()) {
6484  if (myRank == 0) {
6485  out << "null" << endl;
6486  }
6487  } else if (getDomainMap () == getRowMap ()) {
6488  if (myRank == 0) {
6489  out << "same as row Map" << endl;
6490  }
6491  } else if (getDomainMap () == getColMap ()) {
6492  if (myRank == 0) {
6493  out << "same as column Map" << endl;
6494  }
6495  } else {
6496  if (myRank == 0) {
6497  out << endl;
6498  }
6499  getDomainMap ()->describe (out, vl);
6500  }
6501 
6502  // Describe the range Map.
6503  if (myRank == 0) {
6504  out << "Range Map: ";
6505  }
6506  if (getRangeMap ().is_null ()) {
6507  if (myRank == 0) {
6508  out << "null" << endl;
6509  }
6510  } else if (getRangeMap () == getDomainMap ()) {
6511  if (myRank == 0) {
6512  out << "same as domain Map" << endl;
6513  }
6514  } else if (getRangeMap () == getRowMap ()) {
6515  if (myRank == 0) {
6516  out << "same as row Map" << endl;
6517  }
6518  } else {
6519  if (myRank == 0) {
6520  out << endl;
6521  }
6522  getRangeMap ()->describe (out, vl);
6523  }
6524 
6525  // O(P) data
6526  for (int curRank = 0; curRank < numProcs; ++curRank) {
6527  if (myRank == curRank) {
6528  out << "Process rank: " << curRank << endl;
6529  Teuchos::OSTab tab2 (out);
6530  if (! staticGraph_->indicesAreAllocated ()) {
6531  out << "Graph indices not allocated" << endl;
6532  }
6533  else {
6534  out << "Number of allocated entries: "
6535  << staticGraph_->getNodeAllocationSize () << endl;
6536  }
6537  out << "Number of entries: " << getNodeNumEntries () << endl
6538  << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
6539  << endl;
6540  }
6541  // Give output time to complete by executing some barriers.
6542  comm->barrier ();
6543  comm->barrier ();
6544  comm->barrier ();
6545  }
6546 
6547  if (vl < VERB_HIGH) {
6548  return; // all done!
6549  }
6550 
6551  // O(N) and O(NNZ) data
6552  for (int curRank = 0; curRank < numProcs; ++curRank) {
6553  if (myRank == curRank) {
6554  out << std::setw(width) << "Proc Rank"
6555  << std::setw(width) << "Global Row"
6556  << std::setw(width) << "Num Entries";
6557  if (vl == VERB_EXTREME) {
6558  out << std::setw(width) << "(Index,Value)";
6559  }
6560  out << endl;
6561  for (size_t r = 0; r < getNodeNumRows (); ++r) {
6562  const size_t nE = getNumEntriesInLocalRow(r);
6563  GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
6564  out << std::setw(width) << myRank
6565  << std::setw(width) << gid
6566  << std::setw(width) << nE;
6567  if (vl == VERB_EXTREME) {
6568  if (isGloballyIndexed()) {
6569  ArrayView<const GlobalOrdinal> rowinds;
6570  ArrayView<const Scalar> rowvals;
6571  getGlobalRowView (gid, rowinds, rowvals);
6572  for (size_t j = 0; j < nE; ++j) {
6573  out << " (" << rowinds[j]
6574  << ", " << rowvals[j]
6575  << ") ";
6576  }
6577  }
6578  else if (isLocallyIndexed()) {
6579  ArrayView<const LocalOrdinal> rowinds;
6580  ArrayView<const Scalar> rowvals;
6581  getLocalRowView (r, rowinds, rowvals);
6582  for (size_t j=0; j < nE; ++j) {
6583  out << " (" << getColMap()->getGlobalElement(rowinds[j])
6584  << ", " << rowvals[j]
6585  << ") ";
6586  }
6587  } // globally or locally indexed
6588  } // vl == VERB_EXTREME
6589  out << endl;
6590  } // for each row r on this process
6591  } // if (myRank == curRank)
6592 
6593  // Give output time to complete
6594  comm->barrier ();
6595  comm->barrier ();
6596  comm->barrier ();
6597  } // for each process p
6598  }
6599 
6600  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6601  bool
6604  {
6605  // It's not clear what kind of compatibility checks on sizes can
6606  // be performed here. Epetra_CrsGraph doesn't check any sizes for
6607  // compatibility.
6608 
6609  // Currently, the source object must be a RowMatrix with the same
6610  // four template parameters as the target CrsMatrix. We might
6611  // relax this requirement later.
6613  const row_matrix_type* srcRowMat =
6614  dynamic_cast<const row_matrix_type*> (&source);
6615  return (srcRowMat != NULL);
6616  }
6617 
6618  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6619  void
6622  const size_t numSameIDs,
6623  const LocalOrdinal permuteToLIDs[],
6624  const LocalOrdinal permuteFromLIDs[],
6625  const size_t numPermutes)
6626  {
6628  using Teuchos::Array;
6629  using Teuchos::ArrayView;
6630  typedef LocalOrdinal LO;
6631  typedef GlobalOrdinal GO;
6632 #ifdef HAVE_TPETRA_DEBUG
6633  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
6634  const char tfecfFuncName[] = "copyAndPermuteImpl: ";
6635 #endif // HAVE_TPETRA_DEBUG
6636 
6637  ProfilingRegion regionCAP ("Tpetra::CrsMatrix::copyAndPermuteImpl");
6638 
6639  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6640  //
6641  // Copy the first numSame row from source to target (this matrix).
6642  // This involves copying rows corresponding to LIDs [0, numSame-1].
6643  //
6644  const map_type& srcRowMap = * (srcMat.getRowMap ());
6645  Array<GO> rowInds;
6646  Array<Scalar> rowVals;
6647  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
6648  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6649  // Global ID for the current row index in the source matrix.
6650  // The first numSameIDs GIDs in the two input lists are the
6651  // same, so sourceGID == targetGID in this case.
6652  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6653  const GO targetGID = sourceGID;
6654 
6655  // Input views for the combineGlobalValues() call below.
6656  ArrayView<const GO> rowIndsConstView;
6657  ArrayView<const Scalar> rowValsConstView;
6658 
6659  if (sourceIsLocallyIndexed) {
6660  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6661  if (rowLength > static_cast<size_t> (rowInds.size())) {
6662  rowInds.resize (rowLength);
6663  rowVals.resize (rowLength);
6664  }
6665  // Resizing invalidates an Array's views, so we must make new
6666  // ones, even if rowLength hasn't changed.
6667  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6668  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6669 
6670  // The source matrix is locally indexed, so we have to get a
6671  // copy. Really it's the GIDs that have to be copied (because
6672  // they have to be converted from LIDs).
6673  size_t checkRowLength = 0;
6674  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
6675 
6676 #ifdef HAVE_TPETRA_DEBUG
6677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
6678  std::logic_error, "For global row index " << sourceGID << ", the source"
6679  " matrix's getNumEntriesInGlobalRow() method returns a row length of "
6680  << rowLength << ", but the getGlobalRowCopy() method reports that "
6681  "the row length is " << checkRowLength << ". Please report this bug "
6682  "to the Tpetra developers.");
6683 #endif // HAVE_TPETRA_DEBUG
6684 
6685  rowIndsConstView = rowIndsView.view (0, rowLength);
6686  rowValsConstView = rowValsView.view (0, rowLength);
6687  }
6688  else { // source matrix is globally indexed.
6689  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
6690  }
6691 
6692  // Combine the data into the target matrix.
6693  if (this->isStaticGraph ()) {
6694  // Applying a permutation to a matrix with a static graph
6695  // means REPLACE-ing entries.
6696  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE);
6697  }
6698  else {
6699  // Applying a permutation to a matrix with a dynamic graph
6700  // means INSERT-ing entries. This has the same effect as
6701  // ADD, if the target graph already has an entry there.
6702  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT);
6703  }
6704  } // For each of the consecutive source and target IDs that are the same
6705 
6706  //
6707  // Permute the remaining rows.
6708  //
6709  const map_type& tgtRowMap = * (this->getRowMap ());
6710  for (size_t p = 0; p < numPermutes; ++p) {
6711  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6712  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6713 
6714  // Input views for the combineGlobalValues() call below.
6715  ArrayView<const GO> rowIndsConstView;
6716  ArrayView<const Scalar> rowValsConstView;
6717 
6718  if (sourceIsLocallyIndexed) {
6719  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6720  if (rowLength > static_cast<size_t> (rowInds.size ())) {
6721  rowInds.resize (rowLength);
6722  rowVals.resize (rowLength);
6723  }
6724  // Resizing invalidates an Array's views, so we must make new
6725  // ones, even if rowLength hasn't changed.
6726  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6727  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6728 
6729  // The source matrix is locally indexed, so we have to get a
6730  // copy. Really it's the GIDs that have to be copied (because
6731  // they have to be converted from LIDs).
6732  size_t checkRowLength = 0;
6733  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
6734 
6735 #ifdef HAVE_TPETRA_DEBUG
6736  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
6737  std::logic_error, "For the source matrix's global row index "
6738  << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() "
6739  "method returns a row length of " << rowLength << ", but the "
6740  "getGlobalRowCopy() method reports that the row length is "
6741  << checkRowLength << ". Please report this bug to the Tpetra "
6742  "developers.");
6743 #endif // HAVE_TPETRA_DEBUG
6744 
6745  rowIndsConstView = rowIndsView.view (0, rowLength);
6746  rowValsConstView = rowValsView.view (0, rowLength);
6747  }
6748  else {
6749  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
6750  }
6751 
6752  // Combine the data into the target matrix.
6753  if (isStaticGraph()) {
6754  this->combineGlobalValues (targetGID, rowIndsConstView,
6755  rowValsConstView, REPLACE);
6756  }
6757  else {
6758  this->combineGlobalValues (targetGID, rowIndsConstView,
6759  rowValsConstView, INSERT);
6760  }
6761  } // For each ID to permute
6762  }
6763 
6764  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6765  void
6766  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6767 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
6768  copyAndPermuteNew
6769 #else // TPETRA_ENABLE_DEPRECATED_CODE
6770  copyAndPermute
6771 #endif // TPETRA_ENABLE_DEPRECATED_CODE
6772  (const SrcDistObject& srcObj,
6773  const size_t numSameIDs,
6774  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6775  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs)
6776  {
6779  using std::endl;
6780 
6781  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
6782  const char tfecfFuncName[] = "copyAndPermute: ";
6783  ProfilingRegion regionCAP ("Tpetra::CrsMatrix::copyAndPermute");
6784 
6785  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
6786  std::unique_ptr<std::string> prefix;
6787  if (verbose) {
6788  int myRank = -1;
6789  auto map = this->getMap ();
6790  if (! map.is_null ()) {
6791  auto comm = map->getComm ();
6792  if (! comm.is_null ()) {
6793  myRank = comm->getRank ();
6794  }
6795  }
6796  prefix = [myRank] () {
6797  std::ostringstream pfxStrm;
6798  pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::copyAndPermute: ";
6799  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
6800  } ();
6801  std::ostringstream os;
6802  os << *prefix << endl
6803  << *prefix << " "
6804  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs") << endl
6805  << *prefix << " "
6806  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs") << endl;
6807  std::cerr << os.str ();
6808  }
6809 
6810  const auto numPermute = permuteToLIDs.extent (0);
6811  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6812  (numPermute != permuteFromLIDs.extent (0),
6813  std::invalid_argument, "permuteToLIDs.extent(0) = "
6814  << numPermute << "!= permuteFromLIDs.extent(0) = "
6815  << permuteFromLIDs.extent (0) << ".");
6816 
6817  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6818  auto permuteToLIDs_h = permuteToLIDs.view_host ();
6819  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6820  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6821 
6822  // This dynamic cast should succeed, because we've already tested
6823  // it in checkSizes().
6825  const RMT& srcMat = dynamic_cast<const RMT&> (srcObj);
6826 
6827  if (verbose) {
6828  std::ostringstream os;
6829  os << *prefix << "Call copyAndPermuteImpl" << endl;
6830  std::cerr << os.str ();
6831  }
6832  this->copyAndPermuteImpl (srcMat, numSameIDs, permuteToLIDs_h.data (),
6833  permuteFromLIDs_h.data (), numPermute);
6834  }
6835 
6836  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6837  void
6838  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6839 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
6840  packAndPrepareNew
6841 #else // TPETRA_ENABLE_DEPRECATED_CODE
6842  packAndPrepare
6843 #endif // TPETRA_ENABLE_DEPRECATED_CODE
6844  (const SrcDistObject& source,
6845  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6846  Kokkos::DualView<char*, buffer_device_type>& exports,
6847  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6848  size_t& constantNumPackets,
6849  Distributor& distor)
6850  {
6853  using Teuchos::outArg;
6854  using Teuchos::REDUCE_MAX;
6855  using Teuchos::reduceAll;
6856  using std::endl;
6857  typedef LocalOrdinal LO;
6858  typedef GlobalOrdinal GO;
6859  const char tfecfFuncName[] = "packAndPrepare: ";
6860  ProfilingRegion regionPAP ("Tpetra::CrsMatrix::packAndPrepare");
6861 
6862  const bool debug = ::Tpetra::Details::Behavior::debug ();
6863  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
6864 
6865  // Processes on which the communicator is null should not participate.
6866  Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6867  if (pComm.is_null ()) {
6868  return;
6869  }
6870  const Teuchos::Comm<int>& comm = *pComm;
6871  const int myRank = comm.getSize ();
6872 
6873  std::unique_ptr<std::string> prefix;
6874  if (verbose) {
6875  prefix = [myRank] () {
6876  std::ostringstream pfxStrm;
6877  pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::packAndPrepare: ";
6878  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
6879  } ();
6880  std::ostringstream os;
6881  os << *prefix << "Start" << endl
6882  << *prefix << " "
6883  << dualViewStatusToString (exportLIDs, "exportLIDs")
6884  << endl
6885  << *prefix << " "
6886  << dualViewStatusToString (exports, "exports")
6887  << endl
6888  << *prefix << " "
6889  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
6890  << endl;
6891  std::cerr << os.str ();
6892  }
6893 
6894  // Attempt to cast the source object to CrsMatrix. If successful,
6895  // use the source object's packNew() method to pack its data for
6896  // communication. Otherwise, attempt to cast to RowMatrix; if
6897  // successful, use the source object's pack() method. Otherwise,
6898  // the source object doesn't have the right type.
6899  //
6900  // FIXME (mfh 30 Jun 2013, 11 Sep 2017) We don't even need the
6901  // RowMatrix to have the same Node type. Unfortunately, we don't
6902  // have a way to ask if the RowMatrix is "a RowMatrix with any
6903  // Node type," since RowMatrix doesn't have a base class. A
6904  // hypothetical RowMatrixBase<Scalar, LO, GO> class, which does
6905  // not currently exist, would satisfy this requirement.
6906  //
6907  // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
6908  // type doesn't technically need to match the target object's
6909  // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
6910  // and GO need not be the same, as long as there is no overflow of
6911  // the indices. However, checking for index overflow is global
6912  // and therefore undesirable.
6913 
6914  std::ostringstream msg; // for collecting error messages
6915  int lclBad = 0; // to be set below
6916 
6917  using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6918  const crs_matrix_type* srcCrsMat =
6919  dynamic_cast<const crs_matrix_type*> (&source);
6920  if (srcCrsMat != nullptr) {
6921  if (verbose) {
6922  std::ostringstream os;
6923  os << *prefix << "Source matrix same (CrsMatrix) type as target; "
6924  "calling packNew" << endl;
6925  std::cerr << os.str ();
6926  }
6927  try {
6928  srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6929  constantNumPackets, distor);
6930  }
6931  catch (std::exception& e) {
6932  lclBad = 1;
6933  msg << "Proc " << myRank << ": " << e.what () << std::endl;
6934  }
6935  }
6936  else {
6937  using Kokkos::HostSpace;
6938  using Kokkos::subview;
6939  using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6940  using range_type = Kokkos::pair<size_t, size_t>;
6941 
6942  if (verbose) {
6943  std::ostringstream os;
6944  os << *prefix << "Source matrix NOT same (CrsMatrix) type as target"
6945  << endl;
6946  std::cerr << os.str ();
6947  }
6948 
6949  using row_matrix_type = RowMatrix<Scalar, LO, GO, Node>;
6950  const row_matrix_type* srcRowMat =
6951  dynamic_cast<const row_matrix_type*> (&source);
6952  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6953  (srcRowMat == nullptr, std::invalid_argument,
6954  "The source object of the Import or Export operation is neither a "
6955  "CrsMatrix (with the same template parameters as the target object), "
6956  "nor a RowMatrix (with the same first four template parameters as the "
6957  "target object).");
6958 
6959  // For the RowMatrix case, we need to convert from
6960  // Kokkos::DualView to Teuchos::Array*. This doesn't need to be
6961  // so terribly efficient, since packing a non-CrsMatrix
6962  // RowMatrix for Import/Export into a CrsMatrix is not a
6963  // critical case. Thus, we may allocate Teuchos::Array objects
6964  // here and copy to and from Kokkos::*View.
6965 
6966  // View exportLIDs's host data as a Teuchos::ArrayView.
6967  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6968  auto exportLIDs_h = exportLIDs.view_host ();
6969  Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6970  exportLIDs_h.size ());
6971 
6972  // pack() will allocate exports_a as needed. We'll copy back
6973  // into exports (after (re)allocating exports if needed) below.
6974  Teuchos::Array<char> exports_a;
6975 
6976  // View exportLIDs' host data as a Teuchos::ArrayView. We don't
6977  // need to sync, since we're doing write-only access, but we do
6978  // need to mark the DualView as modified on host.
6979 
6980  numPacketsPerLID.clear_sync_state (); // write-only access
6981  numPacketsPerLID.modify_host ();
6982  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6983  Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6984  numPacketsPerLID_h.size ());
6985 
6986  // Invoke RowMatrix's legacy pack() interface, using above
6987  // Teuchos::Array* objects.
6988  try {
6989  srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6990  constantNumPackets, distor);
6991  }
6992  catch (std::exception& e) {
6993  lclBad = 1;
6994  msg << "Proc " << myRank << ": " << e.what () << std::endl;
6995  }
6996 
6997  // Allocate 'exports', and copy exports_a back into it.
6998  const size_t newAllocSize = static_cast<size_t> (exports_a.size ());
6999  if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
7000  const std::string oldLabel = exports.d_view.label ();
7001  const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
7002  exports = exports_type (newLabel, newAllocSize);
7003  }
7004  // It's safe to assume that we're working on host anyway, so
7005  // just keep exports sync'd to host.
7006  // ignore current device contents
7007  exports.modify_host();
7008 
7009  auto exports_h = exports.view_host ();
7010  auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
7011 
7012  // Kokkos::deep_copy needs a Kokkos::View input, so turn
7013  // exports_a into a nonowning Kokkos::View first before copying.
7014  typedef typename exports_type::t_host::execution_space HES;
7015  typedef Kokkos::Device<HES, HostSpace> host_device_type;
7016  Kokkos::View<const char*, host_device_type>
7017  exports_a_kv (exports_a.getRawPtr (), newAllocSize);
7018  Kokkos::deep_copy (exports_h_sub, exports_a_kv);
7019  }
7020 
7021  if (debug) {
7022  int gblBad = 0; // output argument; to be set below
7023  reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
7024  if (gblBad != 0) {
7025  Tpetra::Details::gathervPrint (std::cerr, msg.str (), comm);
7026  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7027  (true, std::logic_error, "packNew() or pack() threw an exception on "
7028  "one or more participating processes.");
7029  }
7030  }
7031  else {
7032  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7033  (lclBad != 0, std::logic_error, "packNew threw an exception on one "
7034  "or more participating processes. Here is this process' error "
7035  "message: " << msg.str ());
7036  }
7037 
7038  if (verbose) {
7039  std::ostringstream os;
7040  os << *prefix << "packAndPrepare: Done!" << endl
7041  << *prefix << " "
7042  << dualViewStatusToString (exportLIDs, "exportLIDs")
7043  << endl
7044  << *prefix << " "
7045  << dualViewStatusToString (exports, "exports")
7046  << endl
7047  << *prefix << " "
7048  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7049  << endl;
7050  std::cerr << os.str ();
7051  }
7052  }
7053 
7054  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7055  size_t
7056  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7057  packRow (char exports[],
7058  const size_t offset,
7059  const size_t numEnt,
7060  const GlobalOrdinal gidsIn[],
7061  const impl_scalar_type valsIn[],
7062  const size_t numBytesPerValue) const
7063  {
7064  using Kokkos::View;
7065  using Kokkos::subview;
7067  typedef LocalOrdinal LO;
7068  typedef GlobalOrdinal GO;
7069  typedef impl_scalar_type ST;
7070  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
7071 
7072  if (numEnt == 0) {
7073  // Empty rows always take zero bytes, to ensure sparsity.
7074  return 0;
7075  }
7076 
7077  const GO gid = 0; // packValueCount wants this
7078  const LO numEntLO = static_cast<size_t> (numEnt);
7079 
7080  const size_t numEntBeg = offset;
7081  const size_t numEntLen = PackTraits<LO, HES>::packValueCount (numEntLO);
7082  const size_t gidsBeg = numEntBeg + numEntLen;
7083  const size_t gidsLen = numEnt * PackTraits<GO, HES>::packValueCount (gid);
7084  const size_t valsBeg = gidsBeg + gidsLen;
7085  const size_t valsLen = numEnt * numBytesPerValue;
7086 
7087  char* const numEntOut = exports + numEntBeg;
7088  char* const gidsOut = exports + gidsBeg;
7089  char* const valsOut = exports + valsBeg;
7090 
7091  size_t numBytesOut = 0;
7092  int errorCode = 0;
7093  numBytesOut += PackTraits<LO, HES>::packValue (numEntOut, numEntLO);
7094 
7095  {
7096  Kokkos::pair<int, size_t> p;
7097  p = PackTraits<GO, HES>::packArray (gidsOut, gidsIn, numEnt);
7098  errorCode += p.first;
7099  numBytesOut += p.second;
7100 
7101  p = PackTraits<ST, HES>::packArray (valsOut, valsIn, numEnt);
7102  errorCode += p.first;
7103  numBytesOut += p.second;
7104  }
7105 
7106  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
7107  TEUCHOS_TEST_FOR_EXCEPTION
7108  (numBytesOut != expectedNumBytes, std::logic_error, "packRow: "
7109  "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
7110  << expectedNumBytes << ".");
7111  TEUCHOS_TEST_FOR_EXCEPTION
7112  (errorCode != 0, std::runtime_error, "packRow: "
7113  "PackTraits::packArray returned a nonzero error code");
7114 
7115  return numBytesOut;
7116  }
7117 
7118  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7119  size_t
7120  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7121  unpackRow (GlobalOrdinal gidsOut[],
7122  impl_scalar_type valsOut[],
7123  const char imports[],
7124  const size_t offset,
7125  const size_t numBytes,
7126  const size_t numEnt,
7127  const size_t numBytesPerValue)
7128  {
7129  using Kokkos::View;
7130  using Kokkos::subview;
7132  typedef LocalOrdinal LO;
7133  typedef GlobalOrdinal GO;
7134  typedef impl_scalar_type ST;
7135  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
7136 
7137  if (numBytes == 0) {
7138  // Rows with zero bytes should always have zero entries.
7139  if (numEnt != 0) {
7140  const int myRank = this->getMap ()->getComm ()->getRank ();
7141  TEUCHOS_TEST_FOR_EXCEPTION
7142  (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
7143  "unpackRow: The number of bytes to unpack numBytes=0, but the "
7144  "number of entries to unpack (as reported by numPacketsPerLID) "
7145  "for this row numEnt=" << numEnt << " != 0.");
7146  }
7147  return 0;
7148  }
7149 
7150  if (numEnt == 0 && numBytes != 0) {
7151  const int myRank = this->getMap ()->getComm ()->getRank ();
7152  TEUCHOS_TEST_FOR_EXCEPTION
7153  (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
7154  "unpackRow: The number of entries to unpack (as reported by "
7155  "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
7156  "numBytes=" << numBytes << " != 0.");
7157  }
7158 
7159  const GO gid = 0; // packValueCount wants this
7160  const LO lid = 0; // packValueCount wants this
7161 
7162  const size_t numEntBeg = offset;
7163  const size_t numEntLen = PackTraits<LO, HES>::packValueCount (lid);
7164  const size_t gidsBeg = numEntBeg + numEntLen;
7165  const size_t gidsLen = numEnt * PackTraits<GO, HES>::packValueCount (gid);
7166  const size_t valsBeg = gidsBeg + gidsLen;
7167  const size_t valsLen = numEnt * numBytesPerValue;
7168 
7169  const char* const numEntIn = imports + numEntBeg;
7170  const char* const gidsIn = imports + gidsBeg;
7171  const char* const valsIn = imports + valsBeg;
7172 
7173  size_t numBytesOut = 0;
7174  int errorCode = 0;
7175  LO numEntOut;
7176  numBytesOut += PackTraits<LO, HES>::unpackValue (numEntOut, numEntIn);
7177  if (static_cast<size_t> (numEntOut) != numEnt ||
7178  numEntOut == static_cast<LO> (0)) {
7179  const int myRank = this->getMap ()->getComm ()->getRank ();
7180  std::ostringstream os;
7181  os << "(Proc " << myRank << ") CrsMatrix::unpackRow: ";
7182  bool firstErrorCondition = false;
7183  if (static_cast<size_t> (numEntOut) != numEnt) {
7184  os << "Number of entries from numPacketsPerLID numEnt=" << numEnt
7185  << " does not equal number of entries unpacked from imports "
7186  "buffer numEntOut=" << numEntOut << ".";
7187  firstErrorCondition = true;
7188  }
7189  if (numEntOut == static_cast<LO> (0)) {
7190  if (firstErrorCondition) {
7191  os << " Also, ";
7192  }
7193  os << "Number of entries unpacked from imports buffer numEntOut=0, "
7194  "but number of bytes to unpack for this row numBytes=" << numBytes
7195  << " != 0. This should never happen, since packRow should only "
7196  "ever pack rows with a nonzero number of entries. In this case, "
7197  "the number of entries from numPacketsPerLID is numEnt=" << numEnt
7198  << ".";
7199  }
7200  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, os.str ());
7201  }
7202 
7203  {
7204  Kokkos::pair<int, size_t> p;
7205  p = PackTraits<GO, HES>::unpackArray (gidsOut, gidsIn, numEnt);
7206  errorCode += p.first;
7207  numBytesOut += p.second;
7208 
7209  p = PackTraits<ST, HES>::unpackArray (valsOut, valsIn, numEnt);
7210  errorCode += p.first;
7211  numBytesOut += p.second;
7212  }
7213 
7214  TEUCHOS_TEST_FOR_EXCEPTION
7215  (numBytesOut != numBytes, std::logic_error, "unpackRow: numBytesOut = "
7216  << numBytesOut << " != numBytes = " << numBytes << ".");
7217 
7218  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
7219  TEUCHOS_TEST_FOR_EXCEPTION
7220  (numBytesOut != expectedNumBytes, std::logic_error, "unpackRow: "
7221  "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
7222  << expectedNumBytes << ".");
7223 
7224  TEUCHOS_TEST_FOR_EXCEPTION
7225  (errorCode != 0, std::runtime_error, "unpackRow: "
7226  "PackTraits::unpackArray returned a nonzero error code");
7227 
7228  return numBytesOut;
7229  }
7230 
7231  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7232  void
7233  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7234  allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
7235  size_t& totalNumEntries,
7236  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs) const
7237  {
7239  using std::endl;
7240  typedef impl_scalar_type IST;
7241  typedef LocalOrdinal LO;
7242  typedef GlobalOrdinal GO;
7243  //const char tfecfFuncName[] = "allocatePackSpaceNew: ";
7244 
7245  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
7246  // output to std::cerr on every MPI process. This is unwise for
7247  // runs with large numbers of MPI processes.
7248  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
7249  std::unique_ptr<std::string> prefix;
7250  if (verbose) {
7251  int myRank = 0;
7252  auto map = this->getMap ();
7253  if (! map.is_null ()) {
7254  auto comm = map->getComm ();
7255  if (! comm.is_null ()) {
7256  myRank = comm->getRank ();
7257  }
7258  }
7259  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
7260  prefix = [myRank] () {
7261  std::ostringstream pfxStrm;
7262  pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::allocatePackSpaceNew: ";
7263  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7264  } ();
7265 
7266  std::ostringstream os;
7267  os << *prefix << "Before:"
7268  << endl
7269  << *prefix << " "
7270  << dualViewStatusToString (exports, "exports")
7271  << endl
7272  << *prefix << " "
7273  << dualViewStatusToString (exportLIDs, "exportLIDs")
7274  << endl;
7275  std::cerr << os.str ();
7276  }
7277 
7278  // The number of export LIDs must fit in LocalOrdinal, assuming
7279  // that the LIDs are distinct and valid on the calling process.
7280  const LO numExportLIDs = static_cast<LO> (exportLIDs.extent (0));
7281 
7282  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
7283  auto exportLIDs_h = exportLIDs.view_host ();
7284 
7285  // Count the total number of matrix entries to send.
7286  totalNumEntries = 0;
7287  for (LO i = 0; i < numExportLIDs; ++i) {
7288  const LO lclRow = exportLIDs_h[i];
7289  size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
7290  // FIXME (mfh 25 Jan 2015) We should actually report invalid row
7291  // indices as an error. Just consider them nonowned for now.
7292  if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
7293  curNumEntries = 0;
7294  }
7295  totalNumEntries += curNumEntries;
7296  }
7297 
7298  // FIXME (mfh 24 Feb 2013, 24 Mar 2017) This code is only correct
7299  // if sizeof(IST) is a meaningful representation of the amount of
7300  // data in a Scalar instance. (LO and GO are always built-in
7301  // integer types.)
7302  //
7303  // Allocate the exports array. It does NOT need padding for
7304  // alignment, since we use memcpy to write to / read from send /
7305  // receive buffers.
7306  const size_t allocSize =
7307  static_cast<size_t> (numExportLIDs) * sizeof (LO) +
7308  totalNumEntries * (sizeof (IST) + sizeof (GO));
7309  if (static_cast<size_t> (exports.extent (0)) < allocSize) {
7310  using exports_type = Kokkos::DualView<char*, buffer_device_type>;
7311 
7312  const std::string oldLabel = exports.d_view.label ();
7313  const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
7314  exports = exports_type (newLabel, allocSize);
7315  }
7316 
7317  if (verbose) {
7318  std::ostringstream os;
7319  os << *prefix << "After:"
7320  << endl
7321  << *prefix << " "
7322  << dualViewStatusToString (exports, "exports")
7323  << endl
7324  << *prefix << " "
7325  << dualViewStatusToString (exportLIDs, "exportLIDs")
7326  << endl;
7327  std::cerr << os.str ();
7328  }
7329  }
7330 
7331  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7332  void
7334  packNew (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
7335  Kokkos::DualView<char*, buffer_device_type>& exports,
7336  const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7337  size_t& constantNumPackets,
7338  Distributor& dist) const
7339  {
7340  // The call to packNew in packAndPrepare catches and handles any exceptions.
7341  if (this->isStaticGraph ()) {
7342  using ::Tpetra::Details::packCrsMatrixNew;
7343  packCrsMatrixNew (*this, exports, numPacketsPerLID, exportLIDs,
7344  constantNumPackets, dist);
7345  }
7346  else {
7347  this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
7348  constantNumPackets, dist);
7349  }
7350  }
7351 
7352  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7353  void
7355  packNonStaticNew (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
7356  Kokkos::DualView<char*, buffer_device_type>& exports,
7357  const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7358  size_t& constantNumPackets,
7359  Distributor& /* distor */) const
7360  {
7361  using Kokkos::View;
7365  using std::endl;
7366  typedef LocalOrdinal LO;
7367  typedef GlobalOrdinal GO;
7368  typedef impl_scalar_type ST;
7369  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
7370  const char tfecfFuncName[] = "packNonStaticNew: ";
7371 
7372  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
7373  // output to std::cerr on every MPI process. This is unwise for
7374  // runs with large numbers of MPI processes.
7375  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
7376  std::unique_ptr<std::string> prefix;
7377  if (verbose) {
7378  int myRank = 0;
7379  auto map = this->getMap ();
7380  if (! map.is_null ()) {
7381  auto comm = map->getComm ();
7382  if (! comm.is_null ()) {
7383  myRank = comm->getRank ();
7384  }
7385  }
7386  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
7387  prefix = [myRank] () {
7388  std::ostringstream pfxStrm;
7389  pfxStrm << "(Proc " << myRank << ") ";
7390  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7391  } ();
7392 
7393  std::ostringstream os;
7394  os << *prefix << "Tpetra::CrsMatrix::packNonStaticNew:" << endl;
7395  std::cerr << os.str ();
7396  }
7397 
7398  const size_t numExportLIDs = static_cast<size_t> (exportLIDs.extent (0));
7399  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7400  (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
7401  std::invalid_argument, "exportLIDs.size() = " << numExportLIDs
7402  << " != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
7403  << ".");
7404 
7405  // Setting this to zero tells the caller to expect a possibly
7406  // different ("nonconstant") number of packets per local index
7407  // (i.e., a possibly different number of entries per row).
7408  constantNumPackets = 0;
7409 
7410  // The pack buffer 'exports' enters this method possibly
7411  // unallocated. Do the first two parts of "Count, allocate, fill,
7412  // compute."
7413  size_t totalNumEntries = 0;
7414  this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
7415  const size_t bufSize = static_cast<size_t> (exports.extent (0));
7416 
7417  // Write-only host access
7418  exports.clear_sync_state();
7419  exports.modify_host();
7420  auto exports_h = exports.view_host ();
7421  if (verbose) {
7422  std::ostringstream os;
7423  os << *prefix << "After marking exports as modified on host, "
7424  << dualViewStatusToString (exports, "exports") << endl;
7425  std::cerr << os.str ();
7426  }
7427 
7428  // Read-only host access
7429  auto exportLIDs_h = exportLIDs.view_host ();
7430 
7431  // Write-only host access
7432  const_cast<Kokkos::DualView<size_t*, buffer_device_type>*>(&numPacketsPerLID)->clear_sync_state();
7433  const_cast<Kokkos::DualView<size_t*, buffer_device_type>*>(&numPacketsPerLID)->modify_host();
7434  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
7435 
7436  // Compute the number of "packets" (in this case, bytes) per
7437  // export LID (in this case, local index of the row to send), and
7438  // actually pack the data.
7439  size_t offset = 0; // current index into 'exports' array.
7440  for (size_t i = 0; i < numExportLIDs; ++i) {
7441  const LO lclRow = exportLIDs_h[i];
7442 
7443  size_t numEnt;
7444  numEnt = this->getNumEntriesInLocalRow (lclRow);
7445 
7446  // Only pack this row's data if it has a nonzero number of
7447  // entries. We can do this because receiving processes get the
7448  // number of packets, and will know that zero packets means zero
7449  // entries.
7450  if (numEnt == 0) {
7451  numPacketsPerLID_h[i] = 0;
7452  continue;
7453  }
7454 
7455  // Temporary buffer for global column indices.
7456  View<GO*, HES> gidsIn_k;
7457  {
7458  GO gid = 0;
7459  gidsIn_k = PackTraits<GO, HES>::allocateArray(gid, numEnt, "gids");
7460  }
7461 
7462  Teuchos::ArrayView<const Scalar> valsIn;
7463  if (this->isLocallyIndexed ()) {
7464  // If the matrix is locally indexed on the calling process, we
7465  // have to use its column Map (which it _must_ have in this
7466  // case) to convert to global indices.
7467  Teuchos::ArrayView<const LO> lidsIn;
7468  this->getLocalRowView (lclRow, lidsIn, valsIn);
7469  const map_type& colMap = * (this->getColMap ());
7470  for (size_t k = 0; k < numEnt; ++k) {
7471  gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
7472  }
7473  }
7474  else if (this->isGloballyIndexed ()) {
7475  // If the matrix is globally indexed on the calling process,
7476  // then we can use the column indices directly. However, we
7477  // have to get the global row index. The calling process must
7478  // have a row Map, since otherwise it shouldn't be participating
7479  // in packing operations.
7480  Teuchos::ArrayView<const GO> gblIndView;;
7481  const map_type& rowMap = * (this->getRowMap ());
7482  const GO gblRow = rowMap.getGlobalElement (lclRow);
7483  this->getGlobalRowView (gblRow, gblIndView, valsIn);
7484  for (size_t k = 0; k < numEnt; ++k) {
7485  gidsIn_k[k] = gblIndView[k];
7486  }
7487  }
7488  // mfh 11 Sep 2017: Currently, if the matrix is neither globally
7489  // nor locally indexed, then it has no entries. Therefore,
7490  // there is nothing to pack. No worries!
7491 
7492  typename HES::device_type outputDevice;
7493  auto valsIn_k =
7495  reinterpret_cast<const ST*> (valsIn.getRawPtr ()),
7496  valsIn.size (),
7497  true, "valsIn");
7498  const size_t numBytesPerValue =
7499  PackTraits<ST,HES>::packValueCount (valsIn[0]);
7500  const size_t numBytes =
7501  this->packRow (exports_h.data (), offset, numEnt, gidsIn_k.data (),
7502  valsIn_k.data (), numBytesPerValue);
7503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7504  (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
7505  "First invalid offset into 'exports' pack buffer at index i = " << i
7506  << ". exportLIDs_h[i]: " << exportLIDs_h[i] << ", bufSize: " <<
7507  bufSize << ", offset: " << offset << ", numBytes: " << numBytes <<
7508  ".");
7509  // numPacketsPerLID_h[i] is the number of "packets" in the
7510  // current local row i. Packet=char (really "byte") so use the
7511  // number of bytes of the packed data for that row.
7512  numPacketsPerLID_h[i] = numBytes;
7513  offset += numBytes;
7514  }
7515 
7516  if (verbose) {
7517  std::ostringstream os;
7518  os << *prefix << "Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
7519  << *prefix << " "
7520  << dualViewStatusToString (exports, "exports")
7521  << endl
7522  << *prefix << " "
7523  << dualViewStatusToString (exportLIDs, "exportLIDs")
7524  << endl;
7525  std::cerr << os.str ();
7526  }
7527  }
7528 
7529  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7530  LocalOrdinal
7531  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7532  combineGlobalValuesRaw (const LocalOrdinal lclRow,
7533  const LocalOrdinal numEnt,
7534  const impl_scalar_type vals[],
7535  const GlobalOrdinal cols[],
7536  const Tpetra::CombineMode combineMode)
7537  {
7538  typedef GlobalOrdinal GO;
7539  //const char tfecfFuncName[] = "combineGlobalValuesRaw: ";
7540 
7541  // mfh 23 Mar 2017: This branch is not thread safe in a debug
7542  // build, due to use of Teuchos::ArrayView; see #229.
7543  const GO gblRow = this->myGraph_->rowMap_->getGlobalElement (lclRow);
7544  Teuchos::ArrayView<const GO> cols_av (numEnt == 0 ? NULL : cols, numEnt);
7545  Teuchos::ArrayView<const Scalar> vals_av (numEnt == 0 ? NULL : reinterpret_cast<const Scalar*> (vals), numEnt);
7546 
7547  // FIXME (mfh 23 Mar 2017) This is a work-around for less common
7548  // combine modes. combineGlobalValues throws on error; it does
7549  // not return an error code. Thus, if it returns, it succeeded.
7550  this->combineGlobalValues (gblRow, cols_av, vals_av, combineMode);
7551  return numEnt;
7552  }
7553 
7554  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7555  void
7556  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7557  combineGlobalValues (const GlobalOrdinal globalRowIndex,
7558  const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
7559  const Teuchos::ArrayView<const Scalar>& values,
7560  const Tpetra::CombineMode combineMode)
7561  {
7562  const char tfecfFuncName[] = "combineGlobalValues: ";
7563 
7564  if (isStaticGraph ()) {
7565  // INSERT doesn't make sense for a static graph, since you
7566  // aren't allowed to change the structure of the graph.
7567  // However, all the other combine modes work.
7568  if (combineMode == ADD) {
7569  sumIntoGlobalValues (globalRowIndex, columnIndices, values);
7570  }
7571  else if (combineMode == REPLACE) {
7572  replaceGlobalValues (globalRowIndex, columnIndices, values);
7573  }
7574  else if (combineMode == ABSMAX) {
7575  using ::Tpetra::Details::AbsMax;
7576  AbsMax<Scalar> f;
7577  this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
7578  columnIndices,
7579  values, f);
7580  }
7581  else if (combineMode == INSERT) {
7582  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7583  isStaticGraph () && combineMode == INSERT, std::invalid_argument,
7584  "INSERT combine mode is not allowed if the matrix has a static graph "
7585  "(i.e., was constructed with the CrsMatrix constructor that takes a "
7586  "const CrsGraph pointer).");
7587  }
7588  else {
7589  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7590  true, std::logic_error, "Invalid combine mode; should never get "
7591  "here! Please report this bug to the Tpetra developers.");
7592  }
7593  }
7594  else { // The matrix has a dynamic graph.
7595  if (combineMode == ADD || combineMode == INSERT) {
7596  // For a dynamic graph, all incoming column indices are
7597  // inserted into the target graph. Duplicate indices will
7598  // have their values summed. In this context, ADD and INSERT
7599  // are equivalent. We need to call insertGlobalValues()
7600  // anyway if the column indices don't yet exist in this row,
7601  // so we just call insertGlobalValues() for both cases.
7602  try {
7603  this->insertGlobalValuesFiltered (globalRowIndex, columnIndices,
7604  values);
7605  }
7606  catch (std::exception& e) {
7607  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7608  (true, std::runtime_error, std::endl
7609  << "insertGlobalValuesFiltered(" << globalRowIndex << ", "
7610  << std::endl << Teuchos::toString (columnIndices) << ", "
7611  << std::endl << Teuchos::toString (values)
7612  << ") threw an exception: " << std::endl << e.what ());
7613  }
7614  }
7615  // FIXME (mfh 14 Mar 2012):
7616  //
7617  // Implementing ABSMAX or REPLACE for a dynamic graph would
7618  // require modifying assembly to attach a possibly different
7619  // combine mode to each inserted (i, j, A_ij) entry. For
7620  // example, consider two different Export operations to the same
7621  // target CrsMatrix, the first with ABSMAX combine mode and the
7622  // second with REPLACE. This isn't a common use case, so we
7623  // won't mess with it for now.
7624  else if (combineMode == ABSMAX) {
7625  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7626  ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
7627  "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
7628  "implemented.");
7629  }
7630  else if (combineMode == REPLACE) {
7631  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7632  ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
7633  "REPLACE combine mode when the matrix has a dynamic graph is not yet "
7634  "implemented.");
7635  }
7636  else {
7637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7638  true, std::logic_error, "Should never get here! Please report this "
7639  "bug to the Tpetra developers.");
7640  }
7641  }
7642  }
7643 
7644  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7645  void
7646  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7647 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
7648  unpackAndCombineNew
7649 #else // TPETRA_ENABLE_DEPRECATED_CODE
7650  unpackAndCombine
7651 #endif // TPETRA_ENABLE_DEPRECATED_CODE
7652  (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
7653  Kokkos::DualView<char*, buffer_device_type> imports,
7654  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7655  const size_t constantNumPackets,
7656  Distributor& distor,
7657  const CombineMode combineMode)
7658  {
7661  using std::endl;
7662  const char tfecfFuncName[] = "unpackAndCombine: ";
7663  ProfilingRegion regionUAC ("Tpetra::CrsMatrix::unpackAndCombine");
7664 
7665  const bool debug = ::Tpetra::Details::Behavior::debug ();
7666  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
7667  constexpr int numValidModes = 5;
7668  const CombineMode validModes[numValidModes] =
7669  {ADD, REPLACE, ABSMAX, INSERT, ZERO};
7670  const char* validModeNames[numValidModes] =
7671  {"ADD", "REPLACE", "ABSMAX", "INSERT", "ZERO"};
7672 
7673  std::unique_ptr<std::string> prefix;
7674  int myRank = 0;
7675  if (verbose) {
7676  auto map = this->getMap ();
7677  if (! map.is_null ()) {
7678  auto comm = map->getComm ();
7679  if (! comm.is_null ()) {
7680  myRank = comm->getRank ();
7681  }
7682  }
7683  prefix = [myRank] () {
7684  std::ostringstream pfxStrm;
7685  pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::unpackAndCombine: ";
7686  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7687  } ();
7688  std::ostringstream os;
7689  os << *prefix << "Start:" << endl
7690  << *prefix << " "
7691  << dualViewStatusToString (importLIDs, "importLIDs")
7692  << endl
7693  << *prefix << " "
7694  << dualViewStatusToString (imports, "imports")
7695  << endl
7696  << *prefix << " "
7697  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7698  << endl
7699  << *prefix << " constantNumPackets: " << constantNumPackets
7700  << endl
7701  << *prefix << " combineMode: " << combineModeToString (combineMode)
7702  << endl;
7703  std::cerr << os.str ();
7704  }
7705 
7706  if (debug) {
7707  if (std::find (validModes, validModes+numValidModes, combineMode) ==
7708  validModes+numValidModes) {
7709  std::ostringstream os;
7710  os << "Invalid combine mode. Valid modes are {";
7711  for (int k = 0; k < numValidModes; ++k) {
7712  os << validModeNames[k];
7713  if (k < numValidModes - 1) {
7714  os << ", ";
7715  }
7716  }
7717  os << "}.";
7718  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7719  (true, std::invalid_argument, os.str ());
7720  }
7721  }
7722 
7723  if (combineMode == ZERO) {
7724  return; // nothing to do
7725  }
7726 
7727  if (debug) {
7728  using Teuchos::reduceAll;
7729  std::unique_ptr<std::ostringstream> msg (new std::ostringstream ());
7730  int lclBad = 0;
7731  try {
7732  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
7733  constantNumPackets, distor, combineMode);
7734  } catch (std::exception& e) {
7735  lclBad = 1;
7736  *msg << e.what ();
7737  }
7738  int gblBad = 0;
7739  const Teuchos::Comm<int>& comm = * (this->getComm ());
7740  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
7741  lclBad, Teuchos::outArg (gblBad));
7742  if (gblBad != 0) {
7743  // mfh 22 Oct 2017: 'prefix' might be null, since it is only
7744  // initialized in a debug build. Thus, we get the process
7745  // rank again here. This is an error message, so the small
7746  // run-time cost doesn't matter. See #1887.
7747  std::ostringstream os;
7748  os << "(Proc " << comm.getRank () << ") " << msg->str () << endl;
7749  msg = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
7750  ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
7751  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7752  (true, std::logic_error, std::endl << "unpackAndCombineImpl "
7753  "threw an exception on one or more participating processes: "
7754  << endl << msg->str ());
7755  }
7756  }
7757  else {
7758  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
7759  constantNumPackets, distor, combineMode);
7760  }
7761 
7762  if (verbose) {
7763  std::ostringstream os;
7764  os << *prefix << "Done!" << endl
7765  << *prefix << " "
7766  << dualViewStatusToString (importLIDs, "importLIDs")
7767  << endl
7768  << *prefix << " "
7769  << dualViewStatusToString (imports, "imports")
7770  << endl
7771  << *prefix << " "
7772  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7773  << endl;
7774  std::cerr << os.str ();
7775  }
7776  }
7777 
7778  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7779  void
7780  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7781  unpackAndCombineImpl (const Kokkos::DualView<const local_ordinal_type*,
7782  buffer_device_type>& importLIDs,
7783  const Kokkos::DualView<const char*,
7784  buffer_device_type>& imports,
7785  const Kokkos::DualView<const size_t*,
7786  buffer_device_type>& numPacketsPerLID,
7787  const size_t constantNumPackets,
7788  Distributor & distor,
7789  const CombineMode combineMode,
7790  const bool atomic)
7791  {
7792  // Exception are caught and handled upstream, so we just call the
7793  // implementations directly.
7794  if (this->isStaticGraph ()) {
7795  using ::Tpetra::Details::unpackCrsMatrixAndCombineNew;
7796  unpackCrsMatrixAndCombineNew (*this, imports, numPacketsPerLID,
7797  importLIDs, constantNumPackets,
7798  distor, combineMode, atomic);
7799  }
7800  else {
7801  this->unpackAndCombineImplNonStatic (importLIDs, imports,
7802  numPacketsPerLID,
7803  constantNumPackets,
7804  distor, combineMode);
7805  }
7806  }
7807 
7808  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7809  void
7810  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7811  unpackAndCombineImplNonStatic (const Kokkos::DualView<const local_ordinal_type*,
7812  buffer_device_type>& importLIDs,
7813  const Kokkos::DualView<const char*,
7814  buffer_device_type>& imports,
7815  const Kokkos::DualView<const size_t*,
7816  buffer_device_type>& numPacketsPerLID,
7817  const size_t /* constantNumPackets */,
7818  Distributor& /* distor */,
7819  const CombineMode combineMode)
7820  {
7821  using Kokkos::View;
7822  using Kokkos::subview;
7823  using Kokkos::MemoryUnmanaged;
7827  using std::endl;
7828  typedef LocalOrdinal LO;
7829  typedef GlobalOrdinal GO;
7830  typedef impl_scalar_type ST;
7831  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
7832  typedef typename View<int*, device_type>::HostMirror::execution_space HES;
7833  typedef std::pair<typename View<int*, HES>::size_type,
7834  typename View<int*, HES>::size_type> pair_type;
7835  typedef View<GO*, HES, MemoryUnmanaged> gids_out_type;
7836  typedef View<ST*, HES, MemoryUnmanaged> vals_out_type;
7837  const char tfecfFuncName[] = "unpackAndCombineImplNonStatic: ";
7838 
7839  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
7840  // output to std::cerr on every MPI process. This is unwise for
7841  // runs with large numbers of MPI processes.
7842  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
7843  std::unique_ptr<std::string> prefix;
7844  if (verbose) {
7845  int myRank = 0;
7846  auto map = this->getMap ();
7847  if (! map.is_null ()) {
7848  auto comm = map->getComm ();
7849  if (! comm.is_null ()) {
7850  myRank = comm->getRank ();
7851  }
7852  }
7853  // Restrict pfxStrm to inner scope to reduce high-water memory usage.
7854  prefix = [myRank] () {
7855  std::ostringstream pfxStrm;
7856  pfxStrm << "Proc " << myRank << ": Tpetra::CrsMatrix::"
7857  "unpackAndCombineImplNonStatic: ";
7858  return std::unique_ptr<std::string> (new std::string (pfxStrm.str ()));
7859  } ();
7860 
7861  std::ostringstream os;
7862  os << *prefix << endl; // we've already printed DualViews' statuses
7863  std::cerr << os.str ();
7864  }
7865 
7866  const size_type numImportLIDs = importLIDs.extent (0);
7867  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7868  (numImportLIDs != static_cast<size_type> (numPacketsPerLID.extent (0)),
7869  std::invalid_argument, "importLIDs.size() = " << numImportLIDs
7870  << " != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
7871  << ".");
7872  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7873  (combineMode != ADD && combineMode != INSERT && combineMode != REPLACE &&
7874  combineMode != ABSMAX && combineMode != ZERO, std::invalid_argument,
7875  "Invalid CombineMode value " << combineMode << ". Valid "
7876  << "values include ADD, INSERT, REPLACE, ABSMAX, and ZERO.");
7877  if (combineMode == ZERO || numImportLIDs == 0) {
7878  return; // nothing to do; no need to combine entries
7879  }
7880 
7881  // We're unpacking on host. This is read-only host access of imports.
7882  {
7883  auto imports_nc = castAwayConstDualView (imports);
7884  imports_nc.sync_host ();
7885  }
7886  auto imports_h = imports.view_host ();
7887 
7888  // Read-only host access.
7889  {
7890  auto numPacketsPerLID_nc = castAwayConstDualView (numPacketsPerLID);
7891  numPacketsPerLID_nc.sync_host ();
7892  }
7893  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
7894 
7895  TEUCHOS_ASSERT( ! importLIDs.need_sync_host () );
7896  auto importLIDs_h = importLIDs.view_host ();
7897 
7898  size_t numBytesPerValue;
7899  {
7900  // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
7901  // with run-time size? We already assume that all entries in both the
7902  // source and target matrices have the same size. If the calling process
7903  // owns at least one entry in either matrix, we can use that entry to set
7904  // the size. However, it is possible that the calling process owns no
7905  // entries. In that case, we're in trouble. One way to fix this would be
7906  // for each row's data to contain the run-time size. This is only
7907  // necessary if the size is not a compile-time constant.
7908  Scalar val;
7909  numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
7910  }
7911 
7912  // Determine the maximum number of entries in any one row
7913  size_t offset = 0;
7914  size_t maxRowNumEnt = 0;
7915  for (size_type i = 0; i < numImportLIDs; ++i) {
7916  const size_t numBytes = numPacketsPerLID_h[i];
7917  if (numBytes == 0) {
7918  continue; // empty buffer for that row means that the row is empty
7919  }
7920  // We need to unpack a nonzero number of entries for this row.
7921 #ifdef HAVE_TPETRA_DEBUG
7922  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7923  (offset + numBytes > static_cast<size_t> (imports_h.extent (0)),
7924  std::logic_error, "At local row index importLIDs_h[i=" << i << "]="
7925  << importLIDs_h[i] << ", offset (=" << offset << ") + numBytes (="
7926  << numBytes << ") > imports_h.extent(0)="
7927  << imports_h.extent (0) << ".");
7928 #endif // HAVE_TPETRA_DEBUG
7929 
7930  LO numEntLO = 0;
7931 
7932 #ifdef HAVE_TPETRA_DEBUG
7933  const size_t theNumBytes = PackTraits<LO, HES>::packValueCount (numEntLO);
7934  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7935  (theNumBytes > numBytes, std::logic_error, "theNumBytes = "
7936  << theNumBytes << " > numBytes = " << numBytes << ".");
7937 #endif // HAVE_TPETRA_DEBUG
7938 
7939  const char* const inBuf = imports_h.data () + offset;
7940  const size_t actualNumBytes =
7941  PackTraits<LO, HES>::unpackValue (numEntLO, inBuf);
7942 
7943 #ifdef HAVE_TPETRA_DEBUG
7944  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7945  (actualNumBytes > numBytes, std::logic_error, "At i = " << i
7946  << ", actualNumBytes=" << actualNumBytes
7947  << " > numBytes=" << numBytes << ".");
7948  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7949  (numEntLO == 0, std::logic_error, "At local row index importLIDs_h[i="
7950  << i << "]=" << importLIDs_h[i] << ", the number of entries read "
7951  "from the packed data is numEntLO=" << numEntLO << ", but numBytes="
7952  << numBytes << " != 0.");
7953 #else
7954  (void) actualNumBytes;
7955 #endif // HAVE_TPETRA_DEBUG
7956 
7957  maxRowNumEnt = std::max (static_cast<size_t> (numEntLO), maxRowNumEnt);
7958  offset += numBytes;
7959  }
7960 
7961  // Temporary space to cache incoming global column indices and
7962  // values. Column indices come in as global indices, in case the
7963  // source object's column Map differs from the target object's
7964  // (this's) column Map.
7965  View<GO*, HES> gblColInds;
7966  View<LO*, HES> lclColInds;
7967  View<ST*, HES> vals;
7968  {
7969  GO gid = 0;
7970  LO lid = 0;
7971  // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
7972  // with run-time size? We already assume that all entries in both the
7973  // source and target matrices have the same size. If the calling process
7974  // owns at least one entry in either matrix, we can use that entry to set
7975  // the size. However, it is possible that the calling process owns no
7976  // entries. In that case, we're in trouble. One way to fix this would be
7977  // for each row's data to contain the run-time size. This is only
7978  // necessary if the size is not a compile-time constant.
7979  Scalar val;
7980  gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowNumEnt, "gids");
7981  lclColInds = PackTraits<LO, HES>::allocateArray (lid, maxRowNumEnt, "lids");
7982  vals = PackTraits<ST, HES>::allocateArray (val, maxRowNumEnt, "vals");
7983  }
7984 
7985  offset = 0;
7986  for (size_type i = 0; i < numImportLIDs; ++i) {
7987  const size_t numBytes = numPacketsPerLID_h[i];
7988  if (numBytes == 0) {
7989  continue; // empty buffer for that row means that the row is empty
7990  }
7991  LO numEntLO = 0;
7992  const char* const inBuf = imports_h.data () + offset;
7993  const size_t actualNumBytes = PackTraits<LO, HES>::unpackValue (numEntLO, inBuf);
7994  (void) actualNumBytes;
7995 
7996  const size_t numEnt = static_cast<size_t>(numEntLO);;
7997  const LO lclRow = importLIDs_h[i];
7998 
7999  gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
8000  vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
8001 
8002  const size_t numBytesOut =
8003  unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
8004  offset, numBytes, numEnt, numBytesPerValue);
8005  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
8006  (numBytes != numBytesOut, std::logic_error, "At i = " << i << ", "
8007  << "numBytes = " << numBytes << " != numBytesOut = " << numBytesOut
8008  << ".");
8009 
8010  const ST* const valsRaw = const_cast<const ST*> (valsOut.data ());
8011  const GO* const gidsRaw = const_cast<const GO*> (gidsOut.data ());
8012  this->combineGlobalValuesRaw (lclRow, numEnt, valsRaw, gidsRaw, combineMode);
8013 
8014  // Don't update offset until current LID has succeeded.
8015  offset += numBytes;
8016  } // for each import LID i
8017  }
8018 
8019  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8020  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
8022  getColumnMapMultiVector (const MV& X_domainMap,
8023  const bool force) const
8024  {
8025  using Teuchos::null;
8026  using Teuchos::RCP;
8027  using Teuchos::rcp;
8028 
8029  TEUCHOS_TEST_FOR_EXCEPTION(
8030  ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
8031  "MapMultiVector: You may only call this method if the matrix has a "
8032  "column Map. If the matrix does not yet have a column Map, you should "
8033  "first call fillComplete (with domain and range Map if necessary).");
8034 
8035  // If the graph is not fill complete, then the Import object (if
8036  // one should exist) hasn't been constructed yet.
8037  TEUCHOS_TEST_FOR_EXCEPTION(
8038  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
8039  "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
8040  "this matrix's graph is fill complete.");
8041 
8042  const size_t numVecs = X_domainMap.getNumVectors ();
8043  RCP<const import_type> importer = this->getGraph ()->getImporter ();
8044  RCP<const map_type> colMap = this->getColMap ();
8045 
8046  RCP<MV> X_colMap; // null by default
8047 
8048  // If the Import object is trivial (null), then we don't need a
8049  // separate column Map multivector. Just return null in that
8050  // case. The caller is responsible for knowing not to use the
8051  // returned null pointer.
8052  //
8053  // If the Import is nontrivial, then we do need a separate
8054  // column Map multivector for the Import operation. Check in
8055  // that case if we have to (re)create the column Map
8056  // multivector.
8057  if (! importer.is_null () || force) {
8058  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
8059  X_colMap = rcp (new MV (colMap, numVecs));
8060 
8061  // Cache the newly created multivector for later reuse.
8062  importMV_ = X_colMap;
8063  }
8064  else { // Yay, we can reuse the cached multivector!
8065  X_colMap = importMV_;
8066  // mfh 09 Jan 2013: We don't have to fill with zeros first,
8067  // because the Import uses INSERT combine mode, which overwrites
8068  // existing entries.
8069  //
8070  //X_colMap->putScalar (ZERO);
8071  }
8072  }
8073  return X_colMap;
8074  }
8075 
8076  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8077  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
8080  const bool force) const
8081  {
8082  using Teuchos::null;
8083  using Teuchos::RCP;
8084  using Teuchos::rcp;
8085 
8086  // If the graph is not fill complete, then the Export object (if
8087  // one should exist) hasn't been constructed yet.
8088  TEUCHOS_TEST_FOR_EXCEPTION(
8089  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
8090  "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
8091  "matrix's graph is fill complete.");
8092 
8093  const size_t numVecs = Y_rangeMap.getNumVectors ();
8094  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
8095  // Every version of the constructor takes either a row Map, or a
8096  // graph (all of whose constructors take a row Map). Thus, the
8097  // matrix always has a row Map.
8098  RCP<const map_type> rowMap = this->getRowMap ();
8099 
8100  RCP<MV> Y_rowMap; // null by default
8101 
8102  // If the Export object is trivial (null), then we don't need a
8103  // separate row Map multivector. Just return null in that case.
8104  // The caller is responsible for knowing not to use the returned
8105  // null pointer.
8106  //
8107  // If the Export is nontrivial, then we do need a separate row
8108  // Map multivector for the Export operation. Check in that case
8109  // if we have to (re)create the row Map multivector.
8110  if (! exporter.is_null () || force) {
8111  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
8112  Y_rowMap = rcp (new MV (rowMap, numVecs));
8113  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
8114  }
8115  else { // Yay, we can reuse the cached multivector!
8116  Y_rowMap = exportMV_;
8117  }
8118  }
8119  return Y_rowMap;
8120  }
8121 
8122  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8123  void
8125  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
8126  {
8127  TEUCHOS_TEST_FOR_EXCEPTION(
8128  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
8129  "removeEmptyProcessesInPlace: This method does not work when the matrix "
8130  "was created with a constant graph (that is, when it was created using "
8131  "the version of its constructor that takes an RCP<const CrsGraph>). "
8132  "This is because the matrix is not allowed to modify the graph in that "
8133  "case, but removing empty processes requires modifying the graph.");
8134  myGraph_->removeEmptyProcessesInPlace (newMap);
8135  // Even though CrsMatrix's row Map (as returned by getRowMap())
8136  // comes from its CrsGraph, CrsMatrix still implements DistObject,
8137  // so we also have to change the DistObject's Map.
8138  this->map_ = this->getRowMap ();
8139  // In the nonconst graph case, staticGraph_ is just a const
8140  // pointer to myGraph_. This assignment is probably redundant,
8141  // but it doesn't hurt.
8142  staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
8143  }
8144 
8145  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8146  Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
8148  add (const Scalar& alpha,
8150  const Scalar& beta,
8151  const Teuchos::RCP<const map_type>& domainMap,
8152  const Teuchos::RCP<const map_type>& rangeMap,
8153  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8154  {
8155  using Teuchos::Array;
8156  using Teuchos::ArrayView;
8157  using Teuchos::ParameterList;
8158  using Teuchos::RCP;
8159  using Teuchos::rcp;
8160  using Teuchos::rcp_implicit_cast;
8161  using Teuchos::sublist;
8162  typedef LocalOrdinal LO;
8163  typedef GlobalOrdinal GO;
8166 
8167  const crs_matrix_type& B = *this; // a convenient abbreviation
8168  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
8169  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
8170 
8171  // If the user didn't supply a domain or range Map, then try to
8172  // get one from B first (if it has them), then from A (if it has
8173  // them). If we don't have any domain or range Maps, scold the
8174  // user.
8175  RCP<const map_type> A_domainMap = A.getDomainMap ();
8176  RCP<const map_type> A_rangeMap = A.getRangeMap ();
8177  RCP<const map_type> B_domainMap = B.getDomainMap ();
8178  RCP<const map_type> B_rangeMap = B.getRangeMap ();
8179 
8180  RCP<const map_type> theDomainMap = domainMap;
8181  RCP<const map_type> theRangeMap = rangeMap;
8182 
8183  if (domainMap.is_null ()) {
8184  if (B_domainMap.is_null ()) {
8185  TEUCHOS_TEST_FOR_EXCEPTION(
8186  A_domainMap.is_null (), std::invalid_argument,
8187  "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
8188  "then you must supply a nonnull domain Map to this method.");
8189  theDomainMap = A_domainMap;
8190  } else {
8191  theDomainMap = B_domainMap;
8192  }
8193  }
8194  if (rangeMap.is_null ()) {
8195  if (B_rangeMap.is_null ()) {
8196  TEUCHOS_TEST_FOR_EXCEPTION(
8197  A_rangeMap.is_null (), std::invalid_argument,
8198  "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
8199  "then you must supply a nonnull range Map to this method.");
8200  theRangeMap = A_rangeMap;
8201  } else {
8202  theRangeMap = B_rangeMap;
8203  }
8204  }
8205 
8206 #ifdef HAVE_TPETRA_DEBUG
8207  // In a debug build, check that A and B have matching domain and
8208  // range Maps, if they have domain and range Maps at all. (If
8209  // they aren't fill complete, then they may not yet have them.)
8210  if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) {
8211  if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
8212  TEUCHOS_TEST_FOR_EXCEPTION(
8213  ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument,
8214  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a domain Map "
8215  "which is the same as (isSameAs) this RowMatrix's domain Map.");
8216  TEUCHOS_TEST_FOR_EXCEPTION(
8217  ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument,
8218  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a range Map "
8219  "which is the same as (isSameAs) this RowMatrix's range Map.");
8220  TEUCHOS_TEST_FOR_EXCEPTION(
8221  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
8222  std::invalid_argument,
8223  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
8224  "(isSameAs) this RowMatrix's domain Map.");
8225  TEUCHOS_TEST_FOR_EXCEPTION(
8226  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
8227  std::invalid_argument,
8228  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
8229  "(isSameAs) this RowMatrix's range Map.");
8230  }
8231  }
8232  else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
8233  TEUCHOS_TEST_FOR_EXCEPTION(
8234  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
8235  std::invalid_argument,
8236  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
8237  "(isSameAs) this RowMatrix's domain Map.");
8238  TEUCHOS_TEST_FOR_EXCEPTION(
8239  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
8240  std::invalid_argument,
8241  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
8242  "(isSameAs) this RowMatrix's range Map.");
8243  }
8244  else {
8245  TEUCHOS_TEST_FOR_EXCEPTION(
8246  domainMap.is_null () || rangeMap.is_null (), std::invalid_argument,
8247  "Tpetra::CrsMatrix::add: If neither A nor B have a domain and range "
8248  "Map, then you must supply a nonnull domain and range Map to this "
8249  "method.");
8250  }
8251 #endif // HAVE_TPETRA_DEBUG
8252 
8253  // What parameters do we pass to C's constructor? Do we call
8254  // fillComplete on C after filling it? And if so, what parameters
8255  // do we pass to C's fillComplete call?
8256  bool callFillComplete = true;
8257  RCP<ParameterList> constructorSublist;
8258  RCP<ParameterList> fillCompleteSublist;
8259  if (! params.is_null ()) {
8260  callFillComplete = params->get ("Call fillComplete", callFillComplete);
8261  constructorSublist = sublist (params, "Constructor parameters");
8262  fillCompleteSublist = sublist (params, "fillComplete parameters");
8263  }
8264 
8265  RCP<const map_type> A_rowMap = A.getRowMap ();
8266  RCP<const map_type> B_rowMap = B.getRowMap ();
8267  RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
8268  RCP<crs_matrix_type> C; // The result matrix.
8269 
8270  // If A and B's row Maps are the same, we can compute an upper
8271  // bound on the number of entries in each row of C, before
8272  // actually computing the sum. A reasonable upper bound is the
8273  // sum of the two entry counts in each row. If we choose this as
8274  // the actual per-row upper bound, we can use static profile.
8275  if (A_rowMap->isSameAs (*B_rowMap)) {
8276  const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8277  Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
8278 
8279  // Get the number of entries in each row of A.
8280  if (alpha != ZERO) {
8281  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8282  const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8283  C_maxNumEntriesPerRow[localRow] += A_numEntries;
8284  }
8285  }
8286  // Get the number of entries in each row of B.
8287  if (beta != ZERO) {
8288  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8289  const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8290  C_maxNumEntriesPerRow[localRow] += B_numEntries;
8291  }
8292  }
8293  // Construct the result matrix C.
8294  if (constructorSublist.is_null ()) {
8295  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
8296  StaticProfile));
8297  } else {
8298  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
8299  StaticProfile, constructorSublist));
8300  }
8301  // Since A and B have the same row Maps, we could add them
8302  // together all at once and merge values before we call
8303  // insertGlobalValues. However, we don't really need to, since
8304  // we've already allocated enough space in each row of C for C
8305  // to do the merge itself.
8306  }
8307  else { // the row Maps of A and B are not the same
8308  // Construct the result matrix C.
8309 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
8310  if (constructorSublist.is_null ()) {
8311  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile));
8312  } else {
8313  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile,
8314  constructorSublist));
8315  }
8316 #endif
8317  }
8318 
8319 #ifdef HAVE_TPETRA_DEBUG
8320  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
8321  "Tpetra::RowMatrix::add: C should not be null at this point. "
8322  "Please report this bug to the Tpetra developers.");
8323 #endif // HAVE_TPETRA_DEBUG
8324  //
8325  // Compute C = alpha*A + beta*B.
8326  //
8327  Array<GO> ind;
8328  Array<Scalar> val;
8329 
8330  if (alpha != ZERO) {
8331  const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8332  for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
8333  size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8334  const GO globalRow = A_rowMap->getGlobalElement (localRow);
8335  if (A_numEntries > static_cast<size_t> (ind.size ())) {
8336  ind.resize (A_numEntries);
8337  val.resize (A_numEntries);
8338  }
8339  ArrayView<GO> indView = ind (0, A_numEntries);
8340  ArrayView<Scalar> valView = val (0, A_numEntries);
8341  A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
8342 
8343  if (alpha != ONE) {
8344  for (size_t k = 0; k < A_numEntries; ++k) {
8345  valView[k] *= alpha;
8346  }
8347  }
8348  C->insertGlobalValues (globalRow, indView, valView);
8349  }
8350  }
8351 
8352  if (beta != ZERO) {
8353  const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
8354  for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
8355  size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8356  const GO globalRow = B_rowMap->getGlobalElement (localRow);
8357  if (B_numEntries > static_cast<size_t> (ind.size ())) {
8358  ind.resize (B_numEntries);
8359  val.resize (B_numEntries);
8360  }
8361  ArrayView<GO> indView = ind (0, B_numEntries);
8362  ArrayView<Scalar> valView = val (0, B_numEntries);
8363  B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
8364 
8365  if (beta != ONE) {
8366  for (size_t k = 0; k < B_numEntries; ++k) {
8367  valView[k] *= beta;
8368  }
8369  }
8370  C->insertGlobalValues (globalRow, indView, valView);
8371  }
8372  }
8373 
8374  if (callFillComplete) {
8375  if (fillCompleteSublist.is_null ()) {
8376  C->fillComplete (theDomainMap, theRangeMap);
8377  } else {
8378  C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
8379  }
8380  }
8381  return rcp_implicit_cast<row_matrix_type> (C);
8382  }
8383 
8384 
8385 
8386  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8387  void
8390  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
8391  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
8392  const Teuchos::RCP<const map_type>& domainMap,
8393  const Teuchos::RCP<const map_type>& rangeMap,
8394  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8395  {
8400  using Teuchos::ArrayRCP;
8401  using Teuchos::ArrayView;
8402  using Teuchos::Comm;
8403  using Teuchos::ParameterList;
8404  using Teuchos::RCP;
8405  typedef LocalOrdinal LO;
8406  typedef GlobalOrdinal GO;
8407  typedef node_type NT;
8408  typedef CrsMatrix<Scalar, LO, GO, NT> this_type;
8409  typedef Vector<int, LO, GO, NT> IntVectorType;
8410  using Teuchos::as;
8411 
8412  const bool debug = ::Tpetra::Details::Behavior::debug ();
8413  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
8414  int MyPID = getComm ()->getRank ();
8415 
8416  std::unique_ptr<std::string> verbosePrefix;
8417  if (verbose) {
8418  std::ostringstream os;
8419  os << "Proc " << MyPID << ": transferAndFillComplete: ";
8420  verbosePrefix = std::unique_ptr<std::string> (new std::string (os.str ()));
8421 
8422  os << "start" << std::endl;
8423  std::cerr << os.str ();
8424  }
8425 
8426  //
8427  // Get the caller's parameters
8428  //
8429  bool isMM = false; // optimize for matrix-matrix ops.
8430  bool reverseMode = false; // Are we in reverse mode?
8431  bool restrictComm = false; // Do we need to restrict the communicator?
8432 
8433  int mm_optimization_core_count=::Tpetra::Details::Behavior::TAFC_OptimizationCoreCount();
8434  RCP<ParameterList> matrixparams; // parameters for the destination matrix
8435  bool overrideAllreduce = false;
8436  if (! params.is_null ()) {
8437  matrixparams = sublist (params, "CrsMatrix");
8438  reverseMode = params->get ("Reverse Mode", reverseMode);
8439  restrictComm = params->get ("Restrict Communicator", restrictComm);
8440  auto & slist = params->sublist("matrixmatrix: kernel params",false);
8441  isMM = slist.get("isMatrixMatrix_TransferAndFillComplete",false);
8442  mm_optimization_core_count = slist.get("MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
8443 
8444  overrideAllreduce = slist.get("MM_TAFC_OverrideAllreduceCheck",false);
8445  if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM = false;
8446  if(reverseMode) isMM = false;
8447  }
8448 
8449  // Only used in the sparse matrix-matrix multiply (isMM) case.
8450  std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
8451  int mismatch = 0;
8452  int reduced_mismatch = 0;
8453  if (isMM && !overrideAllreduce) {
8454 
8455  // Test for pathological matrix transfer
8456  const bool source_vals = ! getGraph ()->getImporter ().is_null();
8457  const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
8458  rowTransfer.getRemoteLIDs ().size() == 0);
8459  mismatch = (source_vals != target_vals) ? 1 : 0;
8460  iallreduceRequest =
8461  ::Tpetra::Details::iallreduce (mismatch, reduced_mismatch,
8462  Teuchos::REDUCE_MAX, * (getComm ()));
8463  }
8464 
8465 #ifdef HAVE_TPETRA_MMM_TIMINGS
8466  using Teuchos::TimeMonitor;
8467  std::string label;
8468  if(!params.is_null())
8469  label = params->get("Timer Label",label);
8470  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
8471  std::string tlstr;
8472  {
8473  std::ostringstream os;
8474  if(isMM) os<<":MMOpt";
8475  else os<<":MMLegacy";
8476  tlstr = os.str();
8477  }
8478 
8479  Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string("TAFC All") +tlstr ));
8480 #endif
8481 
8482  // Make sure that the input argument rowTransfer is either an
8483  // Import or an Export. Import and Export are the only two
8484  // subclasses of Transfer that we defined, but users might
8485  // (unwisely, for now at least) decide to implement their own
8486  // subclasses. Exclude this possibility.
8487  const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
8488  const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
8489  TEUCHOS_TEST_FOR_EXCEPTION(
8490  xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument,
8491  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
8492  "argument must be either an Import or an Export, and its template "
8493  "parameters must match the corresponding template parameters of the "
8494  "CrsMatrix.");
8495 
8496  // Make sure that the input argument domainTransfer is either an
8497  // Import or an Export. Import and Export are the only two
8498  // subclasses of Transfer that we defined, but users might
8499  // (unwisely, for now at least) decide to implement their own
8500  // subclasses. Exclude this possibility.
8501  Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
8502  Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
8503 
8504  if(! domainTransfer.is_null()) {
8505  TEUCHOS_TEST_FOR_EXCEPTION(
8506  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
8507  "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
8508  "argument must be either an Import or an Export, and its template "
8509  "parameters must match the corresponding template parameters of the "
8510  "CrsMatrix.");
8511 
8512  TEUCHOS_TEST_FOR_EXCEPTION(
8513  ( xferAsImport != NULL || ! xferDomainAsImport.is_null() ) &&
8514  (( xferAsImport != NULL && xferDomainAsImport.is_null() ) ||
8515  ( xferAsImport == NULL && ! xferDomainAsImport.is_null() )), std::invalid_argument,
8516  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8517  "arguments must be of the same type (either Import or Export).");
8518 
8519  TEUCHOS_TEST_FOR_EXCEPTION(
8520  ( xferAsExport != NULL || ! xferDomainAsExport.is_null() ) &&
8521  (( xferAsExport != NULL && xferDomainAsExport.is_null() ) ||
8522  ( xferAsExport == NULL && ! xferDomainAsExport.is_null() )), std::invalid_argument,
8523  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8524  "arguments must be of the same type (either Import or Export).");
8525  } // domainTransfer != null
8526 
8527 
8528  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
8529  // if the source Map is not distributed but the target Map is?
8530  const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
8531 
8532  // Get the new domain and range Maps. We need some of them for
8533  // error checking, now that we have the reverseMode parameter.
8534  RCP<const map_type> MyRowMap = reverseMode ?
8535  rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
8536  RCP<const map_type> MyColMap; // create this below
8537  RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
8538  domainMap : getDomainMap ();
8539  RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
8540  rangeMap : getRangeMap ();
8541  RCP<const map_type> BaseRowMap = MyRowMap;
8542  RCP<const map_type> BaseDomainMap = MyDomainMap;
8543 
8544  // If the user gave us a nonnull destMat, then check whether it's
8545  // "pristine." That means that it has no entries.
8546  //
8547  // FIXME (mfh 15 May 2014) If this is not true on all processes,
8548  // then this exception test may hang. It would be better to
8549  // forward an error flag to the next communication phase.
8550  if (! destMat.is_null ()) {
8551  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
8552  // whether a graph or matrix has no entries on the calling
8553  // process, is that it is neither locally nor globally indexed.
8554  // This may change eventually with the Kokkos refactor version
8555  // of Tpetra, so it would be better just to check the quantity
8556  // of interest directly. Note that with the Kokkos refactor
8557  // version of Tpetra, asking for the total number of entries in
8558  // a graph or matrix that is not fill complete might require
8559  // computation (kernel launch), since it is not thread scalable
8560  // to update a count every time an entry is inserted.
8561  const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
8562  ! destMat->getGraph ()->isGloballyIndexed ();
8563  TEUCHOS_TEST_FOR_EXCEPTION(
8564  ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
8565  "transferAndFillComplete: The input argument 'destMat' is only allowed "
8566  "to be nonnull, if its graph is empty (neither locally nor globally "
8567  "indexed).");
8568  // FIXME (mfh 15 May 2014) At some point, we want to change
8569  // graphs and matrices so that their DistObject Map
8570  // (this->getMap()) may differ from their row Map. This will
8571  // make redistribution for 2-D distributions more efficient. I
8572  // hesitate to change this check, because I'm not sure how much
8573  // the code here depends on getMap() and getRowMap() being the
8574  // same.
8575  TEUCHOS_TEST_FOR_EXCEPTION(
8576  ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
8577  "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
8578  "input argument 'destMat' is not the same as the (row) Map specified "
8579  "by the input argument 'rowTransfer'.");
8580  TEUCHOS_TEST_FOR_EXCEPTION(
8581  ! destMat->checkSizes (*this), std::invalid_argument,
8582  "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
8583  "destination matrix, but checkSizes() indicates that it is not a legal "
8584  "legal target for redistribution from the source matrix (*this). This "
8585  "may mean that they do not have the same dimensions.");
8586  }
8587 
8588  // If forward mode (the default), then *this's (row) Map must be
8589  // the same as the source Map of the Transfer. If reverse mode,
8590  // then *this's (row) Map must be the same as the target Map of
8591  // the Transfer.
8592  //
8593  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
8594  // and matrices so that their DistObject Map (this->getMap()) may
8595  // differ from their row Map. This will make redistribution for
8596  // 2-D distributions more efficient. I hesitate to change this
8597  // check, because I'm not sure how much the code here depends on
8598  // getMap() and getRowMap() being the same.
8599  TEUCHOS_TEST_FOR_EXCEPTION(
8600  ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
8601  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8602  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
8603  TEUCHOS_TEST_FOR_EXCEPTION(
8604  ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
8605  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8606  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
8607 
8608  // checks for domainTransfer
8609  TEUCHOS_TEST_FOR_EXCEPTION(
8610  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
8611  std::invalid_argument,
8612  "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
8613  "argument must be the same as the rebalanced domain map 'domainMap'");
8614 
8615  TEUCHOS_TEST_FOR_EXCEPTION(
8616  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
8617  std::invalid_argument,
8618  "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
8619  "argument must be the same as the rebalanced domain map 'domainMap'");
8620 
8621  // The basic algorithm here is:
8622  //
8623  // 1. Call the moral equivalent of "distor.do" to handle the import.
8624  // 2. Copy all the Imported and Copy/Permuted data into the raw
8625  // CrsMatrix / CrsGraphData pointers, still using GIDs.
8626  // 3. Call an optimized version of MakeColMap that avoids the
8627  // Directory lookups (since the importer knows who owns all the
8628  // GIDs) AND reindexes to LIDs.
8629  // 4. Call expertStaticFillComplete()
8630 
8631  // Get information from the Importer
8632  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
8633  ArrayView<const LO> ExportLIDs = reverseMode ?
8634  rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
8635  ArrayView<const LO> RemoteLIDs = reverseMode ?
8636  rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
8637  ArrayView<const LO> PermuteToLIDs = reverseMode ?
8638  rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
8639  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
8640  rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
8641  Distributor& Distor = rowTransfer.getDistributor ();
8642 
8643  // Owning PIDs
8644  Teuchos::Array<int> SourcePids;
8645  Teuchos::Array<int> TargetPids;
8646 
8647  // Temp variables for sub-communicators
8648  RCP<const map_type> ReducedRowMap, ReducedColMap,
8649  ReducedDomainMap, ReducedRangeMap;
8650  RCP<const Comm<int> > ReducedComm;
8651 
8652  // If the user gave us a null destMat, then construct the new
8653  // destination matrix. We will replace its column Map later.
8654  if (destMat.is_null ()) {
8655  destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
8656  }
8657 
8658  /***************************************************/
8659  /***** 1) First communicator restriction phase ****/
8660  /***************************************************/
8661  if (restrictComm) {
8662  ReducedRowMap = MyRowMap->removeEmptyProcesses ();
8663  ReducedComm = ReducedRowMap.is_null () ?
8664  Teuchos::null :
8665  ReducedRowMap->getComm ();
8666  destMat->removeEmptyProcessesInPlace (ReducedRowMap);
8667 
8668  ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
8669  ReducedRowMap :
8670  MyDomainMap->replaceCommWithSubset (ReducedComm);
8671  ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
8672  ReducedRowMap :
8673  MyRangeMap->replaceCommWithSubset (ReducedComm);
8674 
8675  // Reset the "my" maps
8676  MyRowMap = ReducedRowMap;
8677  MyDomainMap = ReducedDomainMap;
8678  MyRangeMap = ReducedRangeMap;
8679 
8680  // Update my PID, if we've restricted the communicator
8681  if (! ReducedComm.is_null ()) {
8682  MyPID = ReducedComm->getRank ();
8683  }
8684  else {
8685  MyPID = -2; // For debugging
8686  }
8687  }
8688  else {
8689  ReducedComm = MyRowMap->getComm ();
8690  }
8691 
8692 
8693 
8694  /***************************************************/
8695  /***** 2) From Tpera::DistObject::doTransfer() ****/
8696  /***************************************************/
8697  // Get the owning PIDs
8698  RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8699 
8700  // check whether domain maps of source matrix and base domain map is the same
8701  bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8702 
8703  if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8704  // Same domain map as source matrix
8705  //
8706  // NOTE: This won't work for restrictComm (because the Import
8707  // doesn't know the restricted PIDs), though writing an
8708  // optimized version for that case would be easy (Import an
8709  // IntVector of the new PIDs). Might want to add this later.
8710  Import_Util::getPids (*MyImporter, SourcePids, false);
8711  }
8712  else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8713  // Same domain map as source matrix (restricted communicator)
8714  // We need one import from the domain to the column map
8715  IntVectorType SourceDomain_pids(getDomainMap (),true);
8716  IntVectorType SourceCol_pids(getColMap());
8717  // SourceDomain_pids contains the restricted pids
8718  SourceDomain_pids.putScalar(MyPID);
8719 
8720  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8721  SourcePids.resize (getColMap ()->getNodeNumElements ());
8722  SourceCol_pids.get1dCopy (SourcePids ());
8723  }
8724  else if (MyImporter.is_null () && bSameDomainMap) {
8725  // Matrix has no off-process entries
8726  SourcePids.resize (getColMap ()->getNodeNumElements ());
8727  SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
8728  }
8729  else if ( ! MyImporter.is_null () &&
8730  ! domainTransfer.is_null () ) {
8731  // general implementation for rectangular matrices with
8732  // domain map different than SourceMatrix domain map.
8733  // User has to provide a DomainTransfer object. We need
8734  // to communications (import/export)
8735 
8736  // TargetDomain_pids lives on the rebalanced new domain map
8737  IntVectorType TargetDomain_pids (domainMap);
8738  TargetDomain_pids.putScalar (MyPID);
8739 
8740  // SourceDomain_pids lives on the non-rebalanced old domain map
8741  IntVectorType SourceDomain_pids (getDomainMap ());
8742 
8743  // SourceCol_pids lives on the non-rebalanced old column map
8744  IntVectorType SourceCol_pids (getColMap ());
8745 
8746  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8747  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8748  }
8749  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8750  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8751  }
8752  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8753  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8754  }
8755  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8756  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8757  }
8758  else {
8759  TEUCHOS_TEST_FOR_EXCEPTION(
8760  true, std::logic_error, "Tpetra::CrsMatrix::"
8761  "transferAndFillComplete: Should never get here! "
8762  "Please report this bug to a Tpetra developer.");
8763  }
8764  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8765  SourcePids.resize (getColMap ()->getNodeNumElements ());
8766  SourceCol_pids.get1dCopy (SourcePids ());
8767  }
8768  else if ( ! MyImporter.is_null () &&
8769  BaseDomainMap->isSameAs (*BaseRowMap) &&
8770  getDomainMap ()->isSameAs (*getRowMap ())) {
8771  // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
8772 
8773  IntVectorType TargetRow_pids (domainMap);
8774  IntVectorType SourceRow_pids (getRowMap ());
8775  IntVectorType SourceCol_pids (getColMap ());
8776 
8777  TargetRow_pids.putScalar (MyPID);
8778  if (! reverseMode && xferAsImport != NULL) {
8779  SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
8780  }
8781  else if (reverseMode && xferAsExport != NULL) {
8782  SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
8783  }
8784  else if (! reverseMode && xferAsExport != NULL) {
8785  SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
8786  }
8787  else if (reverseMode && xferAsImport != NULL) {
8788  SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
8789  }
8790  else {
8791  TEUCHOS_TEST_FOR_EXCEPTION(
8792  true, std::logic_error, "Tpetra::CrsMatrix::"
8793  "transferAndFillComplete: Should never get here! "
8794  "Please report this bug to a Tpetra developer.");
8795  }
8796 
8797  SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
8798  SourcePids.resize (getColMap ()->getNodeNumElements ());
8799  SourceCol_pids.get1dCopy (SourcePids ());
8800  }
8801  else {
8802  TEUCHOS_TEST_FOR_EXCEPTION(
8803  true, std::invalid_argument, "Tpetra::CrsMatrix::"
8804  "transferAndFillComplete: This method only allows either domainMap == "
8805  "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8806  "getDomainMap () == getRowMap ()).");
8807  }
8808 
8809  // Tpetra-specific stuff
8810  size_t constantNumPackets = destMat->constantNumberOfPackets ();
8811  if (constantNumPackets == 0) {
8812  destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8813  RemoteLIDs.size ());
8814  }
8815  else {
8816  // There are a constant number of packets per element. We
8817  // already know (from the number of "remote" (incoming)
8818  // elements) how many incoming elements we expect, so we can
8819  // resize the buffer accordingly.
8820  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
8821  destMat->reallocImportsIfNeeded (rbufLen, false, nullptr);
8822  }
8823 
8824  // Pack & Prepare w/ owning PIDs
8825  if (debug) {
8826  using Teuchos::outArg;
8827  using Teuchos::REDUCE_MAX;
8828  using Teuchos::reduceAll;
8829  using std::cerr;
8830  using std::endl;
8831  RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8832  const int myRank = comm->getRank ();
8833 
8834  std::ostringstream errStrm;
8835  int lclErr = 0;
8836  int gblErr = 0;
8837 
8838  Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8839  try {
8840  // packAndPrepare* methods modify numExportPacketsPerLID_.
8841  destMat->numExportPacketsPerLID_.modify_host ();
8842  numExportPacketsPerLID =
8843  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8844  }
8845  catch (std::exception& e) {
8846  errStrm << "Proc " << myRank << ": getArrayViewFromDualView threw: "
8847  << e.what () << std::endl;
8848  lclErr = 1;
8849  }
8850  catch (...) {
8851  errStrm << "Proc " << myRank << ": getArrayViewFromDualView threw "
8852  "an exception not a subclass of std::exception" << std::endl;
8853  lclErr = 1;
8854  }
8855 
8856  if (! comm.is_null ()) {
8857  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8858  }
8859  if (gblErr != 0) {
8860  ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8861  TEUCHOS_TEST_FOR_EXCEPTION(
8862  true, std::runtime_error, "getArrayViewFromDualView threw an "
8863  "exception on at least one process.");
8864  }
8865 
8866  if (verbose) {
8867  std::ostringstream os;
8868  os << *verbosePrefix << "Calling packCrsMatrixWithOwningPIDs"
8869  << std::endl;
8870  std::cerr << os.str ();
8871  }
8872  try {
8874  destMat->exports_,
8875  numExportPacketsPerLID,
8876  ExportLIDs,
8877  SourcePids,
8878  constantNumPackets,
8879  Distor);
8880  }
8881  catch (std::exception& e) {
8882  errStrm << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs threw: "
8883  << e.what () << std::endl;
8884  lclErr = 1;
8885  }
8886  catch (...) {
8887  errStrm << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs threw "
8888  "an exception not a subclass of std::exception" << std::endl;
8889  lclErr = 1;
8890  }
8891 
8892  if (verbose) {
8893  std::ostringstream os;
8894  os << *verbosePrefix << "Done with packCrsMatrixWithOwningPIDs"
8895  << std::endl;
8896  std::cerr << os.str ();
8897  }
8898 
8899  if (! comm.is_null ()) {
8900  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8901  }
8902  if (gblErr != 0) {
8903  ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
8904  TEUCHOS_TEST_FOR_EXCEPTION(
8905  true, std::runtime_error, "packCrsMatrixWithOwningPIDs threw an "
8906  "exception on at least one process.");
8907  }
8908  }
8909  else {
8910  // packAndPrepare* methods modify numExportPacketsPerLID_.
8911  destMat->numExportPacketsPerLID_.modify_host ();
8912  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8913  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8914  if (verbose) {
8915  std::ostringstream os;
8916  os << *verbosePrefix << "Calling packCrsMatrixWithOwningPIDs"
8917  << std::endl;
8918  std::cerr << os.str ();
8919  }
8921  destMat->exports_,
8922  numExportPacketsPerLID,
8923  ExportLIDs,
8924  SourcePids,
8925  constantNumPackets,
8926  Distor);
8927  if (verbose) {
8928  std::ostringstream os;
8929  os << *verbosePrefix << "Done with packCrsMatrixWithOwningPIDs"
8930  << std::endl;
8931  std::cerr << os.str ();
8932  }
8933  }
8934 
8935  // Do the exchange of remote data.
8936  if (! communication_needed) {
8937  if (verbose) {
8938  std::ostringstream os;
8939  os << *verbosePrefix << "Communication not needed" << std::endl;
8940  std::cerr << os.str ();
8941  }
8942  }
8943  else {
8944  if (reverseMode) {
8945  if (constantNumPackets == 0) { // variable number of packets per LID
8946  if (verbose) {
8947  std::ostringstream os;
8948  os << *verbosePrefix << "Reverse mode, variable # packets / LID"
8949  << std::endl;
8950  std::cerr << os.str ();
8951  }
8952  // Make sure that host has the latest version, since we're
8953  // using the version on host. If host has the latest
8954  // version, syncing to host does nothing.
8955  destMat->numExportPacketsPerLID_.sync_host ();
8956  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8957  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
8958  destMat->numImportPacketsPerLID_.sync_host ();
8959  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8960  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
8961 
8962  if (verbose) {
8963  std::ostringstream os;
8964  os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits"
8965  << std::endl;
8966  std::cerr << os.str ();
8967  }
8968  Distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
8969  numImportPacketsPerLID);
8970  if (verbose) {
8971  std::ostringstream os;
8972  os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits"
8973  << std::endl;
8974  std::cerr << os.str ();
8975  }
8976 
8977  size_t totalImportPackets = 0;
8978  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8979  totalImportPackets += numImportPacketsPerLID[i];
8980  }
8981 
8982  // Reallocation MUST go before setting the modified flag,
8983  // because it may clear out the flags.
8984  destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8985  verbosePrefix.get ());
8986  destMat->imports_.modify_host ();
8987  Teuchos::ArrayView<char> hostImports =
8988  getArrayViewFromDualView (destMat->imports_);
8989  // This is a legacy host pack/unpack path, so use the host
8990  // version of exports_.
8991  destMat->exports_.sync_host ();
8992  Teuchos::ArrayView<const char> hostExports =
8993  getArrayViewFromDualView (destMat->exports_);
8994  if (verbose) {
8995  std::ostringstream os;
8996  os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaits"
8997  << std::endl;
8998  std::cerr << os.str ();
8999  }
9000  Distor.doReversePostsAndWaits (hostExports,
9001  numExportPacketsPerLID,
9002  hostImports,
9003  numImportPacketsPerLID);
9004  if (verbose) {
9005  std::ostringstream os;
9006  os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaits"
9007  << std::endl;
9008  std::cerr << os.str ();
9009  }
9010  }
9011  else { // constant number of packets per LID
9012  if (verbose) {
9013  std::ostringstream os;
9014  os << *verbosePrefix << "Reverse mode, constant # packets / LID"
9015  << std::endl;
9016  std::cerr << os.str ();
9017  }
9018  destMat->imports_.modify_host ();
9019  Teuchos::ArrayView<char> hostImports =
9020  getArrayViewFromDualView (destMat->imports_);
9021  // This is a legacy host pack/unpack path, so use the host
9022  // version of exports_.
9023  destMat->exports_.sync_host ();
9024  Teuchos::ArrayView<const char> hostExports =
9025  getArrayViewFromDualView (destMat->exports_);
9026  if (verbose) {
9027  std::ostringstream os;
9028  os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits"
9029  << std::endl;
9030  std::cerr << os.str ();
9031  }
9032  Distor.doReversePostsAndWaits (hostExports,
9033  constantNumPackets,
9034  hostImports);
9035  if (verbose) {
9036  std::ostringstream os;
9037  os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits"
9038  << std::endl;
9039  std::cerr << os.str ();
9040  }
9041  }
9042  }
9043  else { // forward mode (the default)
9044  if (constantNumPackets == 0) { // variable number of packets per LID
9045  if (verbose) {
9046  std::ostringstream os;
9047  os << *verbosePrefix << "Forward mode, variable # packets / LID"
9048  << std::endl;
9049  std::cerr << os.str ();
9050  }
9051  // Make sure that host has the latest version, since we're
9052  // using the version on host. If host has the latest
9053  // version, syncing to host does nothing.
9054  destMat->numExportPacketsPerLID_.sync_host ();
9055  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
9056  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
9057  destMat->numImportPacketsPerLID_.sync_host ();
9058  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
9059  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
9060  if (verbose) {
9061  std::ostringstream os;
9062  os << *verbosePrefix << "Calling 3-arg doPostsAndWaits"
9063  << std::endl;
9064  std::cerr << os.str ();
9065  }
9066  Distor.doPostsAndWaits (numExportPacketsPerLID, 1,
9067  numImportPacketsPerLID);
9068  if (verbose) {
9069  std::ostringstream os;
9070  os << *verbosePrefix << "Finished 3-arg doPostsAndWaits"
9071  << std::endl;
9072  std::cerr << os.str ();
9073  }
9074 
9075  size_t totalImportPackets = 0;
9076  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
9077  totalImportPackets += numImportPacketsPerLID[i];
9078  }
9079 
9080  // Reallocation MUST go before setting the modified flag,
9081  // because it may clear out the flags.
9082  destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
9083  verbosePrefix.get ());
9084  destMat->imports_.modify_host ();
9085  Teuchos::ArrayView<char> hostImports =
9086  getArrayViewFromDualView (destMat->imports_);
9087  // This is a legacy host pack/unpack path, so use the host
9088  // version of exports_.
9089  destMat->exports_.sync_host ();
9090  Teuchos::ArrayView<const char> hostExports =
9091  getArrayViewFromDualView (destMat->exports_);
9092  if (verbose) {
9093  std::ostringstream os;
9094  os << *verbosePrefix << "Calling 4-arg doPostsAndWaits"
9095  << std::endl;
9096  std::cerr << os.str ();
9097  }
9098  Distor.doPostsAndWaits (hostExports,
9099  numExportPacketsPerLID,
9100  hostImports,
9101  numImportPacketsPerLID);
9102  if (verbose) {
9103  std::ostringstream os;
9104  os << *verbosePrefix << "Finished 4-arg doPostsAndWaits"
9105  << std::endl;
9106  std::cerr << os.str ();
9107  }
9108  }
9109  else { // constant number of packets per LID
9110  if (verbose) {
9111  std::ostringstream os;
9112  os << *verbosePrefix << "Forward mode, constant # packets / LID"
9113  << std::endl;
9114  std::cerr << os.str ();
9115  }
9116  destMat->imports_.modify_host ();
9117  Teuchos::ArrayView<char> hostImports =
9118  getArrayViewFromDualView (destMat->imports_);
9119  // This is a legacy host pack/unpack path, so use the host
9120  // version of exports_.
9121  destMat->exports_.sync_host ();
9122  Teuchos::ArrayView<const char> hostExports =
9123  getArrayViewFromDualView (destMat->exports_);
9124  if (verbose) {
9125  std::ostringstream os;
9126  os << *verbosePrefix << "Calling 3-arg doPostsAndWaits"
9127  << std::endl;
9128  std::cerr << os.str ();
9129  }
9130  Distor.doPostsAndWaits (hostExports,
9131  constantNumPackets,
9132  hostImports);
9133  if (verbose) {
9134  std::ostringstream os;
9135  os << *verbosePrefix << "Finished 3-arg doPostsAndWaits"
9136  << std::endl;
9137  std::cerr << os.str ();
9138  }
9139  }
9140  }
9141  }
9142 
9143  /*********************************************************************/
9144  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
9145  /*********************************************************************/
9146 
9147  // Backwards compatibility measure. We'll use this again below.
9148  destMat->numImportPacketsPerLID_.sync_host ();
9149  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
9150  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
9151  destMat->imports_.sync_host ();
9152  Teuchos::ArrayView<const char> hostImports =
9153  getArrayViewFromDualView (destMat->imports_);
9154 
9155  if (verbose) {
9156  std::ostringstream os;
9157  os << *verbosePrefix << "Calling unpackAndCombineWithOwningPIDsCount"
9158  << std::endl;
9159  std::cerr << os.str ();
9160  }
9161  size_t mynnz =
9163  RemoteLIDs,
9164  hostImports,
9165  numImportPacketsPerLID,
9166  constantNumPackets,
9167  Distor,
9168  INSERT,
9169  NumSameIDs,
9170  PermuteToLIDs,
9171  PermuteFromLIDs);
9172  if (verbose) {
9173  std::ostringstream os;
9174  os << *verbosePrefix << "unpackAndCombineWithOwningPIDsCount returned "
9175  << mynnz << std::endl;
9176  std::cerr << os.str ();
9177  }
9178  size_t N = BaseRowMap->getNodeNumElements ();
9179 
9180  // Allocations
9181  ArrayRCP<size_t> CSR_rowptr(N+1);
9182  ArrayRCP<GO> CSR_colind_GID;
9183  ArrayRCP<LO> CSR_colind_LID;
9184  ArrayRCP<Scalar> CSR_vals;
9185  CSR_colind_GID.resize (mynnz);
9186  CSR_vals.resize (mynnz);
9187 
9188  // If LO and GO are the same, we can reuse memory when
9189  // converting the column indices from global to local indices.
9190  if (typeid (LO) == typeid (GO)) {
9191  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
9192  }
9193  else {
9194  CSR_colind_LID.resize (mynnz);
9195  }
9196 
9197  if (verbose) {
9198  std::ostringstream os;
9199  os << *verbosePrefix << "Calling unpackAndCombineIntoCrsArrays"
9200  << std::endl;
9201  std::cerr << os.str ();
9202  }
9203  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
9204  // unpackAndCombine method on a "CrsArrays" object? This passing
9205  // in a huge list of arrays is icky. Can't we have a bit of an
9206  // abstraction? Implementing a concrete DistObject subclass only
9207  // takes five methods.
9209  RemoteLIDs,
9210  hostImports,
9211  numImportPacketsPerLID,
9212  constantNumPackets,
9213  Distor,
9214  INSERT,
9215  NumSameIDs,
9216  PermuteToLIDs,
9217  PermuteFromLIDs,
9218  N,
9219  mynnz,
9220  MyPID,
9221  CSR_rowptr (),
9222  CSR_colind_GID (),
9223  Teuchos::av_reinterpret_cast<impl_scalar_type> (CSR_vals ()),
9224  SourcePids (),
9225  TargetPids);
9226 
9227  /**************************************************************/
9228  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
9229  /**************************************************************/
9230  // Call an optimized version of makeColMap that avoids the
9231  // Directory lookups (since the Import object knows who owns all
9232  // the GIDs).
9233  Teuchos::Array<int> RemotePids;
9234  if (verbose) {
9235  std::ostringstream os;
9236  os << *verbosePrefix << "Calling lowCommunicationMakeColMapAndReindex"
9237  << std::endl;
9238  std::cerr << os.str ();
9239  }
9240  Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
9241  CSR_colind_LID (),
9242  CSR_colind_GID (),
9243  BaseDomainMap,
9244  TargetPids,
9245  RemotePids,
9246  MyColMap);
9247 
9248  if (verbose) {
9249  std::ostringstream os;
9250  os << *verbosePrefix << "restrictComm="
9251  << (restrictComm ? "true" : "false") << std::endl;
9252  std::cerr << os.str ();
9253  }
9254 
9255  /*******************************************************/
9256  /**** 4) Second communicator restriction phase ****/
9257  /*******************************************************/
9258  if (restrictComm) {
9259  ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
9260  ReducedRowMap :
9261  MyColMap->replaceCommWithSubset (ReducedComm);
9262  MyColMap = ReducedColMap; // Reset the "my" maps
9263  }
9264 
9265  // Replace the col map
9266  if (verbose) {
9267  std::ostringstream os;
9268  os << *verbosePrefix << "Calling replaceColMap" << std::endl;
9269  std::cerr << os.str ();
9270  }
9271  destMat->replaceColMap (MyColMap);
9272 
9273  // Short circuit if the processor is no longer in the communicator
9274  //
9275  // NOTE: Epetra replaces modifies all "removed" processes so they
9276  // have a dummy (serial) Map that doesn't touch the original
9277  // communicator. Duplicating that here might be a good idea.
9278  if (ReducedComm.is_null ()) {
9279  if (verbose) {
9280  std::ostringstream os;
9281  os << *verbosePrefix << "I am no longer in the communicator; "
9282  "returning" << std::endl;
9283  std::cerr << os.str ();
9284  }
9285  return;
9286  }
9287 
9288  /***************************************************/
9289  /**** 5) Sort ****/
9290  /***************************************************/
9291  if ((! reverseMode && xferAsImport != NULL) ||
9292  (reverseMode && xferAsExport != NULL)) {
9293  if (verbose) {
9294  std::ostringstream os;
9295  os << *verbosePrefix << "Calling sortCrsEntries" << std::endl;
9296  std::cerr << os.str ();
9297  }
9298  Import_Util::sortCrsEntries (CSR_rowptr (),
9299  CSR_colind_LID (),
9300  CSR_vals ());
9301  }
9302  else if ((! reverseMode && xferAsExport != NULL) ||
9303  (reverseMode && xferAsImport != NULL)) {
9304  if (verbose) {
9305  std::ostringstream os;
9306  os << *verbosePrefix << "Calling sortAndMergeCrsEntries" << std::endl;
9307  std::cerr << os.str ();
9308  }
9309  Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
9310  CSR_colind_LID (),
9311  CSR_vals ());
9312  if (CSR_rowptr[N] != mynnz) {
9313  CSR_colind_LID.resize (CSR_rowptr[N]);
9314  CSR_vals.resize (CSR_rowptr[N]);
9315  }
9316  }
9317  else {
9318  TEUCHOS_TEST_FOR_EXCEPTION(
9319  true, std::logic_error, "Tpetra::CrsMatrix::"
9320  "transferAndFillComplete: Should never get here! "
9321  "Please report this bug to a Tpetra developer.");
9322  }
9323  /***************************************************/
9324  /**** 6) Reset the colmap and the arrays ****/
9325  /***************************************************/
9326 
9327  if (verbose) {
9328  std::ostringstream os;
9329  os << *verbosePrefix << "Calling destMat->setAllValues" << std::endl;
9330  std::cerr << os.str ();
9331  }
9332 
9333  // Call constructor for the new matrix (restricted as needed)
9334  //
9335  // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
9336  // refactor version of CrsMatrix, though it reserves the right to
9337  // make a deep copy of the arrays.
9338  destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
9339 
9340  /***************************************************/
9341  /**** 7) Build Importer & Call ESFC ****/
9342  /***************************************************/
9343  // Pre-build the importer using the existing PIDs
9344  Teuchos::ParameterList esfc_params;
9345 
9346  RCP<import_type> MyImport;
9347 
9348  // Fulfull the non-blocking allreduce on reduced_mismatch.
9349  if (iallreduceRequest.get () != nullptr) {
9350  if (verbose) {
9351  std::ostringstream os;
9352  os << *verbosePrefix << "Calling iallreduceRequest->wait()" << std::endl;
9353  std::cerr << os.str ();
9354  }
9355  iallreduceRequest->wait ();
9356  if (reduced_mismatch != 0) {
9357  isMM = false;
9358  }
9359  }
9360 
9361  if( isMM ) {
9362 #ifdef HAVE_TPETRA_MMM_TIMINGS
9363  Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string("isMM Block")));
9364 #endif
9365  // Combine all type1/2/3 lists, [filter them], then call the expert import constructor.
9366 
9367  if (verbose) {
9368  std::ostringstream os;
9369  os << *verbosePrefix << "Calling getAllValues" << std::endl;
9370  std::cerr << os.str ();
9371  }
9372 
9373  Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
9374  Teuchos::ArrayRCP<int> type3PIDs;
9375  Teuchos::ArrayRCP<const size_t> rowptr;
9376  Teuchos::ArrayRCP<const LO> colind;
9377  Teuchos::ArrayRCP<const Scalar> vals;
9378  {
9379 #ifdef HAVE_TPETRA_MMM_TIMINGS
9380  TimeMonitor tm_getAllValues (*TimeMonitor::getNewTimer(prefix + std::string("isMMgetAllValues")));
9381 #endif
9382  getAllValues(rowptr,colind,vals);
9383  }
9384 
9385  if (verbose) {
9386  std::ostringstream os;
9387  os << *verbosePrefix << "Calling reverseNeighborDiscovery" << std::endl;
9388  std::cerr << os.str ();
9389  }
9390 
9391  {
9392 #ifdef HAVE_TPETRA_MMM_TIMINGS
9393  TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string("isMMrevNeighDis")));
9394 #endif
9395  Import_Util::reverseNeighborDiscovery(*this,
9396  rowptr,
9397  colind,
9398  rowTransfer,
9399  MyImporter,
9400  MyDomainMap,
9401  type3PIDs,
9402  type3LIDs,
9403  ReducedComm);
9404  }
9405 
9406  if (verbose) {
9407  std::ostringstream os;
9408  os << *verbosePrefix << "Done with reverseNeighborDiscovery" << std::endl;
9409  std::cerr << os.str ();
9410  }
9411 
9412  Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
9413  Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportLIDs();
9414 
9415  Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs(); // row matrix
9416  Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
9417 
9418  const int numCols = getGraph()->getColMap()->getNodeNumElements(); // may be dup
9419  // from EpetraExt_MMHelpers.cpp: build_type2_exports
9420  std::vector<bool> IsOwned(numCols,true);
9421  std::vector<int> SentTo(numCols,-1);
9422  if (! MyImporter.is_null ()) {
9423  for (auto && rlid : MyImporter->getRemoteLIDs()) { // the remoteLIDs must be from sourcematrix
9424  IsOwned[rlid]=false;
9425  }
9426  }
9427 
9428  std::vector<std::pair<int,GO> > usrtg;
9429  usrtg.reserve(TEPID2.size());
9430 
9431  {
9432  const auto& colMap = * (this->getColMap ()); // *this is sourcematrix
9433  for (Array_size_type i = 0; i < TEPID2.size (); ++i) {
9434  const LO row = TELID2[i];
9435  const int pid = TEPID2[i];
9436  for (auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
9437  const int col = colind[j];
9438  if (IsOwned[col] && SentTo[col] != pid) {
9439  SentTo[col] = pid;
9440  GO gid = colMap.getGlobalElement (col);
9441  usrtg.push_back (std::pair<int,GO> (pid, gid));
9442  }
9443  }
9444  }
9445  }
9446 
9447 // This sort can _not_ be omitted.[
9448  std::sort(usrtg.begin(),usrtg.end()); // default comparator does the right thing, now sorted in gid order
9449  auto eopg = std ::unique(usrtg.begin(),usrtg.end());
9450  // 25 Jul 2018: Could just ignore the entries at and after eopg.
9451  usrtg.erase(eopg,usrtg.end());
9452 
9453  const Array_size_type type2_us_size = usrtg.size();
9454  Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(new int[type2_us_size],0,type2_us_size,true);
9455  Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(new LO[type2_us_size],0,type2_us_size,true);
9456 
9457  int pos=0;
9458  for(auto && p : usrtg) {
9459  EPID2[pos]= p.first;
9460  ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
9461  pos++;
9462  }
9463 
9464  Teuchos::ArrayView<int> EPID3 = type3PIDs();
9465  Teuchos::ArrayView< LO> ELID3 = type3LIDs();
9466  GO InfGID = std::numeric_limits<GO>::max();
9467  int InfPID = INT_MAX;
9468 #ifdef TPETRA_MIN3
9469 # undef TPETRA_MIN3
9470 #endif // TPETRA_MIN3
9471 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9472  int i1=0, i2=0, i3=0;
9473  int Len1 = EPID1.size();
9474  int Len2 = EPID2.size();
9475  int Len3 = EPID3.size();
9476 
9477  int MyLen=Len1+Len2+Len3;
9478  Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(new LO[MyLen],0,MyLen,true);
9479  Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(new int[MyLen],0,MyLen,true);
9480  int iloc = 0; // will be the size of the userExportLID/PIDs
9481 
9482  while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9483  int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9484  int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9485  int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9486 
9487  GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9488  GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9489  GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9490 
9491  int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9492  GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9493 #ifdef TPETRA_MIN3
9494 # undef TPETRA_MIN3
9495 #endif // TPETRA_MIN3
9496  bool added_entry=false;
9497 
9498  if(PID1 == MIN_PID && GID1 == MIN_GID){
9499  userExportLIDs[iloc]=ELID1[i1];
9500  userExportPIDs[iloc]=EPID1[i1];
9501  i1++;
9502  added_entry=true;
9503  iloc++;
9504  }
9505  if(PID2 == MIN_PID && GID2 == MIN_GID){
9506  if(!added_entry) {
9507  userExportLIDs[iloc]=ELID2[i2];
9508  userExportPIDs[iloc]=EPID2[i2];
9509  added_entry=true;
9510  iloc++;
9511  }
9512  i2++;
9513  }
9514  if(PID3 == MIN_PID && GID3 == MIN_GID){
9515  if(!added_entry) {
9516  userExportLIDs[iloc]=ELID3[i3];
9517  userExportPIDs[iloc]=EPID3[i3];
9518  iloc++;
9519  }
9520  i3++;
9521  }
9522  }
9523 
9524  if (verbose) {
9525  std::ostringstream os;
9526  os << *verbosePrefix << "Create Import" << std::endl;
9527  std::cerr << os.str ();
9528  }
9529 
9530 #ifdef HAVE_TPETRA_MMM_TIMINGS
9531  auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string("isMMIportCtor")));
9532 #endif
9533  Teuchos::RCP<Teuchos::ParameterList> plist = rcp(new Teuchos::ParameterList());
9534  // 25 Jul 2018: Test for equality with the non-isMM path's Import object.
9535  MyImport = rcp ( new import_type (MyDomainMap,
9536  MyColMap,
9537  RemotePids,
9538  userExportLIDs.view(0,iloc).getConst(),
9539  userExportPIDs.view(0,iloc).getConst(),
9540  plist)
9541  );
9542 
9543  if (verbose) {
9544  std::ostringstream os;
9545  os << *verbosePrefix << "Call expertStaticFillComplete" << std::endl;
9546  std::cerr << os.str ();
9547  }
9548 
9549  {
9550 #ifdef HAVE_TPETRA_MMM_TIMINGS
9551  TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string("isMM::destMat->eSFC")));
9552  esfc_params.set("Timer Label",label+std::string("isMM eSFC"));
9553 #endif
9554  if(!params.is_null())
9555  esfc_params.set("compute global constants",params->get("compute global constants",true));
9556  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(new Teuchos::ParameterList(esfc_params)));
9557 
9558  }
9559 
9560  } // if(isMM)
9561  else {
9562 #ifdef HAVE_TPETRA_MMM_TIMINGS
9563  TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string("TAFC notMMblock")));
9564 #endif
9565  if (verbose) {
9566  std::ostringstream os;
9567  os << *verbosePrefix << "Create Import" << std::endl;
9568  std::cerr << os.str ();
9569  }
9570 
9571 #ifdef HAVE_TPETRA_MMM_TIMINGS
9572  TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC notMMCreateImporter")));
9573 #endif
9574  Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(new Teuchos::ParameterList);
9575  mypars->set("Timer Label","notMMFrom_tAFC");
9576  MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9577 
9578  if (verbose) {
9579  std::ostringstream os;
9580  os << *verbosePrefix << "Call expertStaticFillComplete" << std::endl;
9581  std::cerr << os.str ();
9582  }
9583 
9584 #ifdef HAVE_TPETRA_MMM_TIMINGS
9585  TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string("notMMdestMat->expertStaticFillComplete")));
9586  esfc_params.set("Timer Label",prefix+std::string("notMM eSFC"));
9587 #else
9588  esfc_params.set("Timer Label",std::string("notMM eSFC"));
9589 #endif
9590 
9591  if (!params.is_null ()) {
9592  esfc_params.set ("compute global constants",
9593  params->get ("compute global constants", true));
9594  }
9595  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9596  MyImport, Teuchos::null,
9597  rcp (new Teuchos::ParameterList (esfc_params)));
9598  }
9599 
9600  if (verbose) {
9601  std::ostringstream os;
9602  os << *verbosePrefix << "Done!" << std::endl;
9603  std::cerr << os.str ();
9604  }
9605  }
9606 
9607 
9608  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9609  void
9612  const import_type& importer,
9613  const Teuchos::RCP<const map_type>& domainMap,
9614  const Teuchos::RCP<const map_type>& rangeMap,
9615  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9616  {
9617  transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9618  }
9619 
9620  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9621  void
9624  const import_type& rowImporter,
9625  const import_type& domainImporter,
9626  const Teuchos::RCP<const map_type>& domainMap,
9627  const Teuchos::RCP<const map_type>& rangeMap,
9628  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9629  {
9630  transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9631  }
9632 
9633  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9634  void
9637  const export_type& exporter,
9638  const Teuchos::RCP<const map_type>& domainMap,
9639  const Teuchos::RCP<const map_type>& rangeMap,
9640  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9641  {
9642  transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9643  }
9644 
9645  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9646  void
9649  const export_type& rowExporter,
9650  const export_type& domainExporter,
9651  const Teuchos::RCP<const map_type>& domainMap,
9652  const Teuchos::RCP<const map_type>& rangeMap,
9653  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9654  {
9655  transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9656  }
9657 
9658 
9659 } // namespace Tpetra
9660 
9661 //
9662 // Explicit instantiation macro
9663 //
9664 // Must be expanded from within the Tpetra namespace!
9665 //
9666 
9667 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9668  \
9669  template class CrsMatrix< SCALAR , LO , GO , NODE >; \
9670  template Teuchos::RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
9671  CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
9672 
9673 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9674  \
9675  template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9676  CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9677 
9678 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9679  template<> \
9680  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9681  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9682  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9683  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9684  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9685  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9686  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9687  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9688  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9689  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9690  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9691  const Teuchos::RCP<Teuchos::ParameterList>& params);
9692 
9693 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9694  template<> \
9695  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9696  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9697  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9698  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9699  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9700  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9701  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9702  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9703  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9704  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9705  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9706  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9707  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9708  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9709  const Teuchos::RCP<Teuchos::ParameterList>& params);
9710 
9711 
9712 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9713  template<> \
9714  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9715  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9716  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9717  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9718  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9719  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9720  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9721  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9722  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9723  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9724  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9725  const Teuchos::RCP<Teuchos::ParameterList>& params);
9726 
9727 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9728  template<> \
9729  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9730  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9731  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9732  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9733  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9734  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9735  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9736  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9737  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9738  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9739  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9740  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9741  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9742  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9743  const Teuchos::RCP<Teuchos::ParameterList>& params);
9744 
9745 
9746 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9747  TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9748  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9749  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9750  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9751  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9752 
9753 #endif // TPETRA_CRSMATRIX_DEF_HPP
Teuchos::ArrayRCP< Teuchos::Array< local_ordinal_type > > lclInds2D_
Local column indices for all rows.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
static int TAFC_OptimizationCoreCount()
The core count above which Tpetra::CrsMatrix::transferAndFillComplere will attempt to do advanced nei...
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object&#39;s state is sane; throw if it&#39;s not.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
size_t getNodeNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
virtual bool isLocallyIndexed() const =0
Whether matrix indices are locally indexed.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix&#39;s column Map with the given Map.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
LocalOrdinal getViewRaw(impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Nonconst pointer to all entries (including extra space) in the given row.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
LocalOrdinal replaceLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals) const
Replace one or more entries&#39; values, using local row and column indices.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void getLocalRowCopy(LocalOrdinal localRow, const Teuchos::ArrayView< LocalOrdinal > &colInds, const Teuchos::ArrayView< Scalar > &vals, size_t &numEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void clear_sync_state()
Clear &quot;modified&quot; flags on both host and device sides.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
One or more distributed dense vectors.
size_t getNodeNumEntries() const override
The local number of entries in this matrix.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix&#39;s values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
local_matrix_type lclMatrix_
The local sparse matrix.
bool isDistributed() const
Whether this is a globally distributed object.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix&#39;s graph, as a CrsGraph.
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix&#39;s communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
LocalOrdinal sumIntoLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals, const bool atomic=useAtomicUpdatesByDefault) const
Sum into one or more sparse matrix entries, using local row and column indices.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local matrix.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a copy of the diagonal entries of the matrix.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
&quot;Hybrid&quot; Jacobi + (Gauss-Seidel or SOR) on .
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply() and gaussSeidel().
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
virtual void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const =0
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given global row&#39;s entries.
void clearGlobalConstants()
Clear matrix properties that require collectives.
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
LocalOrdinal getLocalRowViewRaw(const LocalOrdinal lclRow, LocalOrdinal &numEnt, const LocalOrdinal *&lclColInds, const Scalar *&vals) const override
Get a constant, nonpersisting, locally indexed view of the given row of the matrix, using &quot;raw&quot; pointers instead of Teuchos::ArrayView.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas)
Allocate values (and optionally indices) using the Node.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
Insert new values that don&#39;t currently exist.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets, Distributor &dist) const
Pack this object&#39;s data for an Import or Export.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
virtual size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const =0
The current number of entries on the calling process in the specified global row. ...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
Sets up and executes a communication plan for a Tpetra DistObject.
mag_type frobNorm_
Cached Frobenius norm of the (global) matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix&#39;s graph, as a RowGraph.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph&#39;s (MP...
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
Replace existing values with new values.
void computeGlobalConstants()
Compute matrix properties that require collectives.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void modify_host()
Mark data as modified on the host side.
bool isLocallyComplete() const
Is this Export or Import locally complete?
local_matrix_type::values_type getLocalValuesView() const
Get the Kokkos local values.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using local column indices.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< const impl_scalar_type > getView(RowInfo rowinfo) const
Constant view of all entries (including extra space) in the given row.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A read-only, row-oriented interface to a sparse matrix.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
size_t mergeRowIndicesAndValues(crs_graph_type &graph, const RowInfo &rowInfo)
Merge duplicate row indices in the given row, along with their corresponding values.
LocalOrdinal replaceGlobalValues(const GlobalOrdinal globalRow, const typename UnmanagedView< GlobalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals) const
Replace one or more entries&#39; values, using global indices.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
void reorderedGaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Reordered &quot;Hybrid&quot; Jacobi + (Gauss-Seidel or SOR) on .
Declaration and definition of Tpetra::Details::getEntryOnHost.
Teuchos::ArrayView< impl_scalar_type > getViewNonConst(const RowInfo &rowinfo) const
Nonconst view of all entries (including extra space) in the given row.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
::Tpetra::Details::EStorageStatus storageStatus_
Status of the matrix&#39;s storage, when not in a fill-complete state.
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
LocalOrdinal getViewRawConst(const impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Const pointer to all entries (including extra space) in the given row.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
void sync_device()
Synchronize to Device.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.