Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_CrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSMATRIX_DEF_HPP
41 #define TPETRA_CRSMATRIX_DEF_HPP
42 
50 
51 #include "Tpetra_Import_Util.hpp"
52 #include "Tpetra_Import_Util2.hpp"
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_LocalCrsMatrixOperator.hpp"
55 
62 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
67 #include "KokkosSparse_getDiagCopy.hpp"
71 #include "Tpetra_Details_packCrsMatrix.hpp"
72 #include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
74 #include "Teuchos_FancyOStream.hpp"
75 #include "Teuchos_RCP.hpp"
76 #include "Teuchos_DataAccess.hpp"
77 #include "Teuchos_SerialDenseMatrix.hpp" // unused here, could delete
78 
79 #include <memory>
80 #include <sstream>
81 #include <typeinfo>
82 #include <utility>
83 #include <vector>
84 
85 using Teuchos::rcpFromRef;
86 
87 namespace Tpetra {
88 
89 namespace { // (anonymous)
90 
91  template<class T, class BinaryFunction>
92  T atomic_binary_function_update (volatile T* const dest,
93  const T& inputVal,
94  BinaryFunction f)
95  {
96  T oldVal = *dest;
97  T assume;
98 
99  // NOTE (mfh 30 Nov 2015) I do NOT need a fence here for IBM
100  // POWER architectures, because 'newval' depends on 'assume',
101  // which depends on 'oldVal', which depends on '*dest'. This
102  // sets up a chain of read dependencies that should ensure
103  // correct behavior given a sane memory model.
104  do {
105  assume = oldVal;
106  T newVal = f (assume, inputVal);
107  oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
108  } while (assume != oldVal);
109 
110  return oldVal;
111  }
112 } // namespace (anonymous)
113 
114 //
115 // Users must never rely on anything in the Details namespace.
116 //
117 namespace Details {
118 
128 template<class Scalar>
129 struct AbsMax {
131  Scalar operator() (const Scalar& x, const Scalar& y) {
132  typedef Teuchos::ScalarTraits<Scalar> STS;
133  return std::max (STS::magnitude (x), STS::magnitude (y));
134  }
135 };
136 
137 } // namespace Details
138 } // namespace Tpetra
139 
140 namespace Tpetra {
141 
142  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
143  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
144  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
145  size_t maxNumEntriesPerRow,
146  const ProfileType pftype,
147  const Teuchos::RCP<Teuchos::ParameterList>& params) :
148  dist_object_type (rowMap)
149  {
150  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, size_t, "
151  "ProfileType[, RCP<ParameterList>]): ";
152  Teuchos::RCP<crs_graph_type> graph;
153  try {
154  graph = Teuchos::rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
155  pftype, params));
156  }
157  catch (std::exception& e) {
158  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
159  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
160  "size_t, ProfileType[, RCP<ParameterList>]) threw an exception: "
161  << e.what ());
162  }
163  // myGraph_ not null means that the matrix owns the graph. That's
164  // different than the const CrsGraph constructor, where the matrix
165  // does _not_ own the graph.
166  myGraph_ = graph;
167  staticGraph_ = myGraph_;
168  resumeFill (params);
170  }
171 
172  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
174  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
175  const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
176  const ProfileType pftype,
177  const Teuchos::RCP<Teuchos::ParameterList>& params) :
178  dist_object_type (rowMap)
179  {
180  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
181  "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]): ";
182  Teuchos::RCP<crs_graph_type> graph;
183  try {
184  using Teuchos::rcp;
185  graph = rcp(new crs_graph_type(rowMap, numEntPerRowToAlloc,
186  pftype, params));
187  }
188  catch (std::exception& e) {
189  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
190  (true, std::runtime_error, "CrsGraph constructor "
191  "(RCP<const Map>, ArrayView<const size_t>, "
192  "ProfileType[, RCP<ParameterList>]) threw an exception: "
193  << e.what ());
194  }
195  // myGraph_ not null means that the matrix owns the graph. That's
196  // different than the const CrsGraph constructor, where the matrix
197  // does _not_ own the graph.
198  myGraph_ = graph;
199  staticGraph_ = graph;
200  resumeFill (params);
202  }
203 
204 
205  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
207  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
208  const Teuchos::RCP<const map_type>& colMap,
209  const size_t maxNumEntPerRow,
210  const ProfileType pftype,
211  const Teuchos::RCP<Teuchos::ParameterList>& params) :
212  dist_object_type (rowMap)
213  {
214  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
215  "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]): ";
216  const char suffix[] =
217  " Please report this bug to the Tpetra developers.";
218 
219  // An artifact of debugging something a while back.
220  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
221  (! staticGraph_.is_null (), std::logic_error,
222  "staticGraph_ is not null at the beginning of the constructor."
223  << suffix);
224  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
225  (! myGraph_.is_null (), std::logic_error,
226  "myGraph_ is not null at the beginning of the constructor."
227  << suffix);
228  Teuchos::RCP<crs_graph_type> graph;
229  try {
230  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
231  maxNumEntPerRow,
232  pftype, params));
233  }
234  catch (std::exception& e) {
235  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
236  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
237  "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]) threw an "
238  "exception: " << e.what ());
239  }
240  // myGraph_ not null means that the matrix owns the graph. That's
241  // different than the const CrsGraph constructor, where the matrix
242  // does _not_ own the graph.
243  myGraph_ = graph;
244  staticGraph_ = myGraph_;
245  resumeFill (params);
247  }
248 
249  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
251  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
252  const Teuchos::RCP<const map_type>& colMap,
253  const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
254  const ProfileType pftype,
255  const Teuchos::RCP<Teuchos::ParameterList>& params) :
256  dist_object_type (rowMap)
257  {
258  const char tfecfFuncName[] =
259  "CrsMatrix(RCP<const Map>, RCP<const Map>, "
260  "ArrayView<const size_t>, ProfileType[, RCP<ParameterList>]): ";
261  Teuchos::RCP<crs_graph_type> graph;
262  try {
263  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
264  numEntPerRowToAlloc,
265  pftype, params));
266  }
267  catch (std::exception& e) {
268  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
269  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
270  "RCP<const Map>, ArrayView<const size_t>, ProfileType[, "
271  "RCP<ParameterList>]) threw an exception: " << e.what ());
272  }
273  // myGraph_ not null means that the matrix owns the graph. That's
274  // different than the const CrsGraph constructor, where the matrix
275  // does _not_ own the graph.
276  myGraph_ = graph;
277  staticGraph_ = graph;
278  resumeFill (params);
280  }
281 
282 
283  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
285  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
286  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
287  dist_object_type (graph->getRowMap ()),
288  staticGraph_ (graph),
289  storageStatus_ (Details::STORAGE_1D_PACKED)
290  {
291  using std::endl;
292  typedef typename local_matrix_type::values_type values_type;
293  const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>[, "
294  "RCP<ParameterList>]): ";
295  const bool verbose = Details::Behavior::verbose("CrsMatrix");
296 
297  std::unique_ptr<std::string> prefix;
298  if (verbose) {
299  prefix = this->createPrefix("CrsMatrix", "CrsMatrix(graph,params)");
300  std::ostringstream os;
301  os << *prefix << "Start" << endl;
302  std::cerr << os.str ();
303  }
304 
305  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
306  (graph.is_null (), std::runtime_error, "Input graph is null.");
307  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
308  (! graph->isFillComplete (), std::runtime_error, "Input graph "
309  "is not fill complete. You must call fillComplete on the "
310  "graph before using it to construct a CrsMatrix. Note that "
311  "calling resumeFill on the graph makes it not fill complete, "
312  "even if you had previously called fillComplete. In that "
313  "case, you must call fillComplete on the graph again.");
314 
315  // The graph is fill complete, so it is locally indexed and has a
316  // fixed structure. This means we can allocate the (1-D) array of
317  // values and build the local matrix right now. Note that the
318  // local matrix's number of columns comes from the column Map, not
319  // the domain Map.
320 
321  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
322  auto lclGraph = graph->getLocalGraph ();
323  const size_t numEnt = lclGraph.entries.extent (0);
324  if (verbose) {
325  std::ostringstream os;
326  os << *prefix << "Allocate values: " << numEnt << endl;
327  std::cerr << os.str ();
328  }
329  values_type val ("Tpetra::CrsMatrix::val", numEnt);
330 
331  auto lclMat = std::make_shared<local_matrix_type>
332  ("Tpetra::CrsMatrix::lclMatrix_", numCols, val, lclGraph);
333  lclMatrix_ = std::make_shared<local_multiply_op_type> (lclMat);
334 
335  // FIXME (22 Jun 2016) I would very much like to get rid of
336  // k_values1D_ at some point. I find it confusing to have all
337  // these extra references lying around.
338  if (verbose) {
339  std::ostringstream os;
340  os << *prefix << "Assign k_values1D_: old="
341  << k_values1D_.extent(0) << ", new="
342  << lclMat->values.extent(0) << endl;
343  std::cerr << os.str ();
344  }
345  k_values1D_ = lclMat->values;
346 
348 
349  if (verbose) {
350  std::ostringstream os;
351  os << *prefix << "Done" << endl;
352  std::cerr << os.str ();
353  }
354  }
355 
356  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
358  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
359  const typename local_matrix_type::values_type& values,
360  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
361  dist_object_type (graph->getRowMap ()),
362  staticGraph_ (graph),
363  storageStatus_ (Details::STORAGE_1D_PACKED)
364  {
365  const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>, "
366  "local_matrix_type::values_type, "
367  "[,RCP<ParameterList>]): ";
368  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
369  (graph.is_null (), std::runtime_error, "Input graph is null.");
370  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
371  (! graph->isFillComplete (), std::runtime_error, "Input graph "
372  "is not fill complete. You must call fillComplete on the "
373  "graph before using it to construct a CrsMatrix. Note that "
374  "calling resumeFill on the graph makes it not fill complete, "
375  "even if you had previously called fillComplete. In that "
376  "case, you must call fillComplete on the graph again.");
377 
378  // The graph is fill complete, so it is locally indexed and has a
379  // fixed structure. This means we can allocate the (1-D) array of
380  // values and build the local matrix right now. Note that the
381  // local matrix's number of columns comes from the column Map, not
382  // the domain Map.
383 
384  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
385  auto lclGraph = graph->getLocalGraph ();
386 
387  auto lclMat = std::make_shared<local_matrix_type>
388  ("Tpetra::CrsMatrix::lclMatrix_", numCols, values, lclGraph);
389  lclMatrix_ = std::make_shared<local_multiply_op_type> (lclMat);
390 
391  // FIXME (22 Jun 2016) I would very much like to get rid of
392  // k_values1D_ at some point. I find it confusing to have all
393  // these extra references lying around.
394  k_values1D_ = lclMat->values;
395 
397  }
398 
399  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
401  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
402  const Teuchos::RCP<const map_type>& colMap,
403  const typename local_matrix_type::row_map_type& rowPointers,
404  const typename local_graph_type::entries_type::non_const_type& columnIndices,
405  const typename local_matrix_type::values_type& values,
406  const Teuchos::RCP<Teuchos::ParameterList>& params) :
407  dist_object_type (rowMap),
408  storageStatus_ (Details::STORAGE_1D_PACKED)
409  {
410  using Details::getEntryOnHost;
411  using Teuchos::RCP;
412  using std::endl;
413  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
414  "RCP<const Map>, ptr, ind, val[, params]): ";
415  const char suffix[] =
416  ". Please report this bug to the Tpetra developers.";
417  const bool debug = Details::Behavior::debug("CrsMatrix");
418  const bool verbose = Details::Behavior::verbose("CrsMatrix");
419 
420  std::unique_ptr<std::string> prefix;
421  if (verbose) {
422  prefix = this->createPrefix(
423  "CrsMatrix", "CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
424  std::ostringstream os;
425  os << *prefix << "Start" << endl;
426  std::cerr << os.str ();
427  }
428 
429  // Check the user's input. Note that this might throw only on
430  // some processes but not others, causing deadlock. We prefer
431  // deadlock due to exceptions to segfaults, because users can
432  // catch exceptions.
433  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
434  (values.extent(0) != columnIndices.extent(0),
435  std::invalid_argument, "values.extent(0)=" << values.extent(0)
436  << " != columnIndices.extent(0) = " << columnIndices.extent(0)
437  << ".");
438  if (debug && rowPointers.extent(0) != 0) {
439  const size_t numEnt =
440  getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
441  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
442  (numEnt != size_t(columnIndices.extent(0)) ||
443  numEnt != size_t(values.extent(0)),
444  std::invalid_argument, "Last entry of rowPointers says that "
445  "the matrix has " << numEnt << " entr"
446  << (numEnt != 1 ? "ies" : "y") << ", but the dimensions of "
447  "columnIndices and values don't match this. "
448  "columnIndices.extent(0)=" << columnIndices.extent (0)
449  << " and values.extent(0)=" << values.extent (0) << ".");
450  }
451 
452  RCP<crs_graph_type> graph;
453  try {
454  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, rowPointers,
455  columnIndices, params));
456  }
457  catch (std::exception& e) {
458  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
459  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
460  "RCP<const Map>, ptr, ind[, params]) threw an exception: "
461  << e.what ());
462  }
463  // The newly created CrsGraph _must_ have a local graph at this
464  // point. We don't really care whether CrsGraph's constructor
465  // deep-copies or shallow-copies the input, but the dimensions
466  // have to be right. That's how we tell whether the CrsGraph has
467  // a local graph.
468  auto lclGraph = graph->getLocalGraph ();
469  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
470  (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
471  lclGraph.entries.extent (0) != columnIndices.extent (0),
472  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
473  "ind[, params]) did not set the local graph correctly." << suffix);
474  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
475  (lclGraph.entries.extent (0) != values.extent (0),
476  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
477  "params]) did not set the local graph correctly. "
478  "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
479  << " != values.extent(0) = " << values.extent (0) << suffix);
480 
481  // myGraph_ not null means that the matrix owns the graph. This
482  // is true because the column indices come in as nonconst,
483  // implying shared ownership.
484  myGraph_ = graph;
485  staticGraph_ = graph;
486 
487  // The graph may not be fill complete yet. However, it is locally
488  // indexed (since we have a column Map) and has a fixed structure
489  // (due to the input arrays). This means we can allocate the
490  // (1-D) array of values and build the local matrix right now.
491  // Note that the local matrix's number of columns comes from the
492  // column Map, not the domain Map.
493 
494  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
495 
496  auto lclMat = std::make_shared<local_matrix_type>
497  ("Tpetra::CrsMatrix::lclMatrix_", numCols, values, lclGraph);
498  lclMatrix_ = std::make_shared<local_multiply_op_type> (lclMat);
499 
500  auto newValues = lclMat->values;
501  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
502  (newValues.extent (0) != values.extent (0),
503  std::logic_error, "Local matrix's constructor did not set the "
504  "values correctly. newValues.extent(0) = " <<
505  newValues.extent (0) << " != values.extent(0) = " <<
506  values.extent (0) << suffix);
507 
508  // FIXME (22 Jun 2016) I would very much like to get rid of
509  // k_values1D_ at some point. I find it confusing to have all
510  // these extra references lying around.
511  this->k_values1D_ = newValues;
512 
514  if (verbose) {
515  std::ostringstream os;
516  os << *prefix << "Done" << endl;
517  std::cerr << os.str();
518  }
519  }
520 
521  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
523  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
524  const Teuchos::RCP<const map_type>& colMap,
525  const Teuchos::ArrayRCP<size_t>& ptr,
526  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
527  const Teuchos::ArrayRCP<Scalar>& val,
528  const Teuchos::RCP<Teuchos::ParameterList>& params) :
529  dist_object_type (rowMap),
530  storageStatus_ (Details::STORAGE_1D_PACKED)
531  {
532  using Kokkos::Compat::getKokkosViewDeepCopy;
533  using Teuchos::av_reinterpret_cast;
534  using Teuchos::RCP;
535  using values_type = typename local_matrix_type::values_type;
536  using IST = impl_scalar_type;
537  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
538  "RCP<const Map>, ptr, ind, val[, params]): ";
539 
540  RCP<crs_graph_type> graph;
541  try {
542  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, ptr,
543  ind, params));
544  }
545  catch (std::exception& e) {
546  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
547  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
548  "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
549  "RCP<ParameterList>]) threw an exception: " << e.what ());
550  }
551  // myGraph_ not null means that the matrix owns the graph. This
552  // is true because the column indices come in as nonconst,
553  // implying shared ownership.
554  myGraph_ = graph;
555  staticGraph_ = graph;
556 
557  // The graph may not be fill complete yet. However, it is locally
558  // indexed (since we have a column Map) and has a fixed structure
559  // (due to the input arrays). This means we can allocate the
560  // (1-D) array of values and build the local matrix right now.
561  // Note that the local matrix's number of columns comes from the
562  // column Map, not the domain Map.
563 
564  // The graph _must_ have a local graph at this point. We don't
565  // really care whether CrsGraph's constructor deep-copies or
566  // shallow-copies the input, but the dimensions have to be right.
567  // That's how we tell whether the CrsGraph has a local graph.
568  auto lclGraph = staticGraph_->getLocalGraph ();
569  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
570  (size_t (lclGraph.row_map.extent (0)) != size_t (ptr.size ()) ||
571  size_t (lclGraph.entries.extent (0)) != size_t (ind.size ()),
572  std::logic_error, "CrsGraph's constructor (rowMap, colMap, "
573  "ptr, ind[, params]) did not set the local graph correctly. "
574  "Please report this bug to the Tpetra developers.");
575 
576  const size_t numCols =
577  staticGraph_->getColMap ()->getNodeNumElements ();
578  values_type valIn =
579  getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
580 
581  auto lclMat = std::make_shared<local_matrix_type>
582  ("Tpetra::CrsMatrix::lclMatrix_", numCols, valIn, lclGraph);
583  lclMatrix_ = std::make_shared<local_multiply_op_type> (lclMat);
584 
585  // FIXME (22 Jun 2016) I would very much like to get rid of
586  // k_values1D_ at some point. I find it confusing to have all
587  // these extra references lying around.
588  this->k_values1D_ = lclMat->values;
589 
591  }
592 
593  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
595  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
596  const Teuchos::RCP<const map_type>& colMap,
597  const local_matrix_type& lclMatrix,
598  const Teuchos::RCP<Teuchos::ParameterList>& params) :
599  dist_object_type (rowMap),
600  lclMatrix_ (std::make_shared<local_multiply_op_type>
601  (std::make_shared<local_matrix_type> (lclMatrix))),
602  k_values1D_ (lclMatrix.values),
603  storageStatus_ (Details::STORAGE_1D_PACKED),
604  fillComplete_ (true)
605  {
606  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
607  "RCP<const Map>, local_matrix_type[, RCP<ParameterList>]): ";
608  const char suffix[] =
609  " Please report this bug to the Tpetra developers.";
610 
611  Teuchos::RCP<crs_graph_type> graph;
612  try {
613  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
614  lclMatrix.graph, params));
615  }
616  catch (std::exception& e) {
617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
618  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
619  "RCP<const Map>, local_graph_type[, RCP<ParameterList>]) threw an "
620  "exception: " << e.what ());
621  }
622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
623  (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
624  "<const Map>, RCP<const Map>, local_graph_type[, RCP<ParameterList>]) "
625  "did not produce a fill-complete graph. Please report this bug to the "
626  "Tpetra developers.");
627  // myGraph_ not null means that the matrix owns the graph. This
628  // is true because the column indices come in as nonconst through
629  // the matrix, implying shared ownership.
630  myGraph_ = graph;
631  staticGraph_ = graph;
632 
633  const bool callComputeGlobalConstants = params.get () == nullptr ||
634  params->get ("compute global constants", true);
635  if (callComputeGlobalConstants) {
636  this->computeGlobalConstants ();
637  }
638 
639  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
640  (isFillActive (), std::logic_error,
641  "At the end of a CrsMatrix constructor that should produce "
642  "a fillComplete matrix, isFillActive() is true." << suffix);
643  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
644  (! isFillComplete (), std::logic_error, "At the end of a "
645  "CrsMatrix constructor that should produce a fillComplete "
646  "matrix, isFillComplete() is false." << suffix);
648  }
649 
650  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
652  CrsMatrix (const local_matrix_type& lclMatrix,
653  const Teuchos::RCP<const map_type>& rowMap,
654  const Teuchos::RCP<const map_type>& colMap,
655  const Teuchos::RCP<const map_type>& domainMap,
656  const Teuchos::RCP<const map_type>& rangeMap,
657  const Teuchos::RCP<Teuchos::ParameterList>& params) :
658  dist_object_type (rowMap),
659  lclMatrix_ (std::make_shared<local_multiply_op_type>
660  (std::make_shared<local_matrix_type> (lclMatrix))),
661  k_values1D_ (lclMatrix.values),
662  storageStatus_ (Details::STORAGE_1D_PACKED),
663  fillComplete_ (true)
664  {
665  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
666  "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
667  "local_matrix_type[, RCP<ParameterList>]): ";
668  const char suffix[] =
669  " Please report this bug to the Tpetra developers.";
670 
671  Teuchos::RCP<crs_graph_type> graph;
672  try {
673  graph = Teuchos::rcp (new crs_graph_type (lclMatrix.graph, rowMap, colMap,
674  domainMap, rangeMap, params));
675  }
676  catch (std::exception& e) {
677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
678  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
679  "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_type[, "
680  "RCP<ParameterList>]) threw an exception: " << e.what ());
681  }
682  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
683  (! graph->isFillComplete (), std::logic_error, "CrsGraph "
684  "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
685  "RCP<const Map>, local_graph_type[, RCP<ParameterList>]) did "
686  "not produce a fillComplete graph." << suffix);
687  // myGraph_ not null means that the matrix owns the graph. This
688  // is true because the column indices come in as nonconst through
689  // the matrix, implying shared ownership.
690  myGraph_ = graph;
691  staticGraph_ = graph;
692 
693  const bool callComputeGlobalConstants = params.get () == nullptr ||
694  params->get ("compute global constants", true);
695  if (callComputeGlobalConstants) {
696  this->computeGlobalConstants ();
697  }
698 
699  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
700  (isFillActive (), std::logic_error,
701  "At the end of a CrsMatrix constructor that should produce "
702  "a fillComplete matrix, isFillActive() is true." << suffix);
703  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
704  (! isFillComplete (), std::logic_error, "At the end of a "
705  "CrsMatrix constructor that should produce a fillComplete "
706  "matrix, isFillComplete() is false." << suffix);
708  }
709 
710  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
712  CrsMatrix (const local_matrix_type& lclMatrix,
713  const Teuchos::RCP<const map_type>& rowMap,
714  const Teuchos::RCP<const map_type>& colMap,
715  const Teuchos::RCP<const map_type>& domainMap,
716  const Teuchos::RCP<const map_type>& rangeMap,
717  const Teuchos::RCP<const import_type>& importer,
718  const Teuchos::RCP<const export_type>& exporter,
719  const Teuchos::RCP<Teuchos::ParameterList>& params) :
720  dist_object_type (rowMap),
721  lclMatrix_ (std::make_shared<local_multiply_op_type>
722  (std::make_shared<local_matrix_type> (lclMatrix))),
723  k_values1D_ (lclMatrix.values),
724  storageStatus_ (Details::STORAGE_1D_PACKED),
725  fillComplete_ (true)
726  {
727  using Teuchos::rcp;
728  const char tfecfFuncName[] = "Tpetra::CrsMatrix"
729  "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
730  const char suffix[] =
731  " Please report this bug to the Tpetra developers.";
732 
733  Teuchos::RCP<crs_graph_type> graph;
734  try {
735  graph = rcp (new crs_graph_type (lclMatrix.graph, rowMap, colMap,
736  domainMap, rangeMap, importer,
737  exporter, params));
738  }
739  catch (std::exception& e) {
740  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
741  (true, std::runtime_error, "CrsGraph constructor "
742  "(local_graph_type, Map, Map, Map, Map, Import, Export, "
743  "params) threw: " << e.what ());
744  }
745  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
746  (!graph->isFillComplete (), std::logic_error, "CrsGraph "
747  "constructor (local_graph_type, Map, Map, Map, Map, Import, "
748  "Export, params) did not produce a fill-complete graph. "
749  "Please report this bug to the Tpetra developers.");
750  // myGraph_ not null means that the matrix owns the graph. This
751  // is true because the column indices come in as nonconst through
752  // the matrix, implying shared ownership.
753  myGraph_ = graph;
754  staticGraph_ = graph;
755 
756  const bool callComputeGlobalConstants = params.get () == nullptr ||
757  params->get ("compute global constants", true);
758  if (callComputeGlobalConstants) {
759  this->computeGlobalConstants ();
760  }
761 
762  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
763  (isFillActive (), std::logic_error,
764  "At the end of a CrsMatrix constructor that should produce "
765  "a fillComplete matrix, isFillActive() is true." << suffix);
766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
767  (! isFillComplete (), std::logic_error, "At the end of a "
768  "CrsMatrix constructor that should produce a fillComplete "
769  "matrix, isFillComplete() is false." << suffix);
771  }
772 
773  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
776  const Teuchos::DataAccess copyOrView)
777  : CrsMatrix (source.getCrsGraph (), source.getLocalValuesView ())
778  {
779  const char tfecfFuncName[] = "Tpetra::CrsMatrix("
780  "const CrsMatrix&, const Teuchos::DataAccess): ";
781  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
782  (! source.isFillComplete (), std::invalid_argument,
783  "Source graph must be fillComplete().");
784 
785  if (copyOrView == Teuchos::Copy) {
786  using values_type = typename local_matrix_type::values_type;
787  values_type vals = source.getLocalValuesView ();
788  using Kokkos::view_alloc;
789  using Kokkos::WithoutInitializing;
790  values_type newvals (view_alloc ("val", WithoutInitializing),
791  vals.extent (0));
792  Kokkos::deep_copy (newvals, vals);
793  k_values1D_ = newvals;
794  if (source.isFillComplete ()) {
795  fillComplete (source.getDomainMap (), source.getRangeMap ());
796  }
797  }
798  else if (copyOrView == Teuchos::View) {
799  return;
800  }
801  else {
802  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
803  (true, std::invalid_argument, "Second argument 'copyOrView' "
804  "has an invalid value " << copyOrView << ". Valid values "
805  "include Teuchos::Copy = " << Teuchos::Copy << " and "
806  "Teuchos::View = " << Teuchos::View << ".");
807  }
808  }
809 
810  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
811  void
814  {
815  std::swap(crs_matrix.importMV_, this->importMV_); // mutable Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
816  std::swap(crs_matrix.exportMV_, this->exportMV_); // mutable Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
817  std::swap(crs_matrix.staticGraph_, this->staticGraph_); // Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>>
818  std::swap(crs_matrix.myGraph_, this->myGraph_); // Teuchos::RCP< CrsGraph<LocalOrdinal, GlobalOrdinal, Node>>
819  std::swap(crs_matrix.lclMatrix_, this->lclMatrix_); // KokkosSparse::CrsMatrix<impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type>
820  std::swap(crs_matrix.k_values1D_, this->k_values1D_); // KokkosSparse::CrsMatrix<impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type>::values_type
821  std::swap(crs_matrix.storageStatus_, this->storageStatus_); // ::Tpetra::Details::EStorageStatus (enum f/m Tpetra_CrsGraph_decl.hpp)
822  std::swap(crs_matrix.fillComplete_, this->fillComplete_); // bool
823  std::swap(crs_matrix.nonlocals_, this->nonlocals_); // std::map<GO, pair<Teuchos::Array<GO>,Teuchos::Array<Scalar>>
824  std::swap(crs_matrix.frobNorm_, this->frobNorm_); // mutable Kokkos::Details::ArithTraits<impl_scalar_type>::mag_type
825  }
826 
827  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
828  Teuchos::RCP<const Teuchos::Comm<int> >
830  getComm () const {
831  return getCrsGraphRef ().getComm ();
832  }
833 
834  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
837  getProfileType () const {
838  return this->getCrsGraphRef ().getProfileType ();
839  }
840 
841  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
842  bool
844  isFillComplete () const {
845  return fillComplete_;
846  }
847 
848  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
849  bool
851  isFillActive () const {
852  return ! fillComplete_;
853  }
854 
855  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
856  bool
859  return this->getCrsGraphRef ().isStorageOptimized ();
860  }
861 
862  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
863  bool
866  return getCrsGraphRef ().isLocallyIndexed ();
867  }
868 
869  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
870  bool
873  return getCrsGraphRef ().isGloballyIndexed ();
874  }
875 
876  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
877  bool
879  hasColMap () const {
880  return getCrsGraphRef ().hasColMap ();
881  }
882 
883  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
887  return getCrsGraphRef ().getGlobalNumEntries ();
888  }
889 
890  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
891  size_t
894  return getCrsGraphRef ().getNodeNumEntries ();
895  }
896 
897  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
901  return getCrsGraphRef ().getGlobalNumRows ();
902  }
903 
904  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
908  return getCrsGraphRef ().getGlobalNumCols ();
909  }
910 
911  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
912  size_t
914  getNodeNumRows () const {
915  return getCrsGraphRef ().getNodeNumRows ();
916  }
917 
918  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
919  size_t
921  getNodeNumCols () const {
922  return getCrsGraphRef ().getNodeNumCols ();
923  }
924 
925 
926  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
927  size_t
929  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
930  return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
931  }
932 
933  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
934  size_t
936  getNumEntriesInLocalRow (LocalOrdinal localRow) const {
937  return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
938  }
939 
940  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
941  size_t
944  return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
945  }
946 
947  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
948  size_t
951  return getCrsGraphRef ().getNodeMaxNumRowEntries ();
952  }
953 
954  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
955  GlobalOrdinal
957  getIndexBase () const {
958  return getRowMap ()->getIndexBase ();
959  }
960 
961  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
962  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
964  getRowMap () const {
965  return getCrsGraphRef ().getRowMap ();
966  }
967 
968  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
969  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
971  getColMap () const {
972  return getCrsGraphRef ().getColMap ();
973  }
974 
975  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
976  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
978  getDomainMap () const {
979  return getCrsGraphRef ().getDomainMap ();
980  }
981 
982  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
983  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
985  getRangeMap () const {
986  return getCrsGraphRef ().getRangeMap ();
987  }
988 
989  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
990  Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
992  getGraph () const {
993  if (staticGraph_ != Teuchos::null) {
994  return staticGraph_;
995  }
996  return myGraph_;
997  }
998 
999  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1000  Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
1002  getCrsGraph () const {
1003  if (staticGraph_ != Teuchos::null) {
1004  return staticGraph_;
1005  }
1006  return myGraph_;
1007  }
1008 
1009  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1012  getCrsGraphRef () const
1013  {
1014 #ifdef HAVE_TPETRA_DEBUG
1015  constexpr bool debug = true;
1016 #else
1017  constexpr bool debug = false;
1018 #endif // HAVE_TPETRA_DEBUG
1019 
1020  if (! this->staticGraph_.is_null ()) {
1021  return * (this->staticGraph_);
1022  }
1023  else {
1024  if (debug) {
1025  const char tfecfFuncName[] = "getCrsGraphRef: ";
1026  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1027  (this->myGraph_.is_null (), std::logic_error,
1028  "Both staticGraph_ and myGraph_ are null. "
1029  "Please report this bug to the Tpetra developers.");
1030  }
1031  return * (this->myGraph_);
1032  }
1033  }
1034 
1035  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1036  typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_type
1039  {
1040  return lclMatrix_.get () == nullptr ?
1041  local_matrix_type () :
1042  lclMatrix_->getLocalMatrix ();
1043  }
1044 
1045  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1046  std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1049  {
1050  return lclMatrix_;
1051  }
1052 
1053  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1054  bool
1056  isStaticGraph () const {
1057  return myGraph_.is_null ();
1058  }
1059 
1060  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1061  bool
1064  return true;
1065  }
1066 
1067  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1068  bool
1071  return true;
1072  }
1073 
1074  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1075  void
1077  allocateValues (ELocalGlobal lg, GraphAllocationStatus gas,
1078  const bool verbose)
1079  {
1080  using Details::Behavior;
1082  using std::endl;
1083  const char tfecfFuncName[] = "allocateValues: ";
1084  const char suffix[] =
1085  " Please report this bug to the Tpetra developers.";
1086  ProfilingRegion region("Tpetra::CrsMatrix::allocateValues");
1087 
1088  std::unique_ptr<std::string> prefix;
1089  if (verbose) {
1090  prefix = this->createPrefix("CrsMatrix", "allocateValues");
1091  std::ostringstream os;
1092  os << *prefix << "lg: "
1093  << (lg == LocalIndices ? "Local" : "Global") << "Indices"
1094  << ", gas: Graph"
1095  << (gas == GraphAlreadyAllocated ? "Already" : "NotYet")
1096  << "Allocated" << endl;
1097  std::cerr << os.str();
1098  }
1099 
1100  const bool debug = Behavior::debug("CrsMatrix");
1101  if (debug) {
1102  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1103  (this->staticGraph_.is_null (), std::logic_error,
1104  "staticGraph_ is null." << suffix);
1105 
1106  // If the graph indices are already allocated, then gas should be
1107  // GraphAlreadyAllocated. Otherwise, gas should be
1108  // GraphNotYetAllocated.
1109  if ((gas == GraphAlreadyAllocated) !=
1110  staticGraph_->indicesAreAllocated ()) {
1111  const char err1[] = "The caller has asserted that the graph "
1112  "is ";
1113  const char err2[] = "already allocated, but the static graph "
1114  "says that its indices are ";
1115  const char err3[] = "already allocated. ";
1116  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1117  (gas == GraphAlreadyAllocated &&
1118  ! staticGraph_->indicesAreAllocated (), std::logic_error,
1119  err1 << err2 << "not " << err3 << suffix);
1120  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1121  (gas != GraphAlreadyAllocated &&
1122  staticGraph_->indicesAreAllocated (), std::logic_error,
1123  err1 << "not " << err2 << err3 << suffix);
1124  }
1125 
1126  // If the graph is unallocated, then it had better be a
1127  // matrix-owned graph. ("Matrix-owned graph" means that the
1128  // matrix gets to define the graph structure. If the CrsMatrix
1129  // constructor that takes an RCP<const CrsGraph> was used, then
1130  // the matrix does _not_ own the graph.)
1131  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1132  (! this->staticGraph_->indicesAreAllocated () &&
1133  this->myGraph_.is_null (), std::logic_error,
1134  "The static graph says that its indices are not allocated, "
1135  "but the graph is not owned by the matrix." << suffix);
1136  }
1137 
1138  if (gas == GraphNotYetAllocated) {
1139  if (debug) {
1140  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1141  (this->myGraph_.is_null (), std::logic_error,
1142  "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1143  }
1144  try {
1145  this->myGraph_->allocateIndices (lg, verbose);
1146  }
1147  catch (std::exception& e) {
1148  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1149  (true, std::runtime_error, "CrsGraph::allocateIndices "
1150  "threw an exception: " << e.what ());
1151  }
1152  catch (...) {
1153  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1154  (true, std::runtime_error, "CrsGraph::allocateIndices "
1155  "threw an exception not a subclass of std::exception.");
1156  }
1157  }
1158 
1159  // Allocate matrix values.
1160  // "Static profile" means that the number of matrix entries in
1161  // each row was fixed at the time the CrsMatrix constructor was
1162  // called. This lets us use 1-D storage for the matrix's
1163  // values. ("1-D storage" means the same as that used by the
1164  // three arrays in the compressed sparse row storage format.)
1165 
1166  if (debug) {
1167  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1168  (this->staticGraph_.is_null (), std::logic_error,
1169  "this->getProfileType() == StaticProfile, but staticGraph_ "
1170  "is null." << suffix);
1171  }
1172 
1173  const size_t lclNumRows = this->staticGraph_->getNodeNumRows ();
1174  typename Graph::local_graph_type::row_map_type k_ptrs =
1175  this->staticGraph_->k_rowPtrs_;
1176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177  (k_ptrs.extent (0) != lclNumRows+1, std::logic_error,
1178  "With StaticProfile, row offsets array has length "
1179  << k_ptrs.extent (0) << " != (lclNumRows+1) = "
1180  << (lclNumRows+1) << ".");
1181 
1182  const size_t lclTotalNumEntries =
1183  ::Tpetra::Details::getEntryOnHost (k_ptrs, lclNumRows);
1184 
1185  // Allocate array of (packed???) matrix values.
1186  using values_type = typename local_matrix_type::values_type;
1187  if (verbose) {
1188  std::ostringstream os;
1189  os << *prefix << "Allocate k_values1D_: Pre "
1190  << k_values1D_.extent(0) << ", post "
1191  << lclTotalNumEntries << endl;
1192  std::cerr << os.str();
1193  }
1194  this->k_values1D_ =
1195  values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1196  }
1197 
1198  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1199  void
1201  getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
1202  Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
1203  Teuchos::ArrayRCP<const Scalar>& values) const
1204  {
1205  using Teuchos::RCP;
1206  const char tfecfFuncName[] = "getAllValues: ";
1207  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1208  columnIndices.size () != values.size (), std::runtime_error,
1209  "Requires that columnIndices and values are the same size.");
1210 
1211  RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
1212  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1213  relevantGraph.is_null (), std::runtime_error,
1214  "Requires that getCrsGraph() is not null.");
1215  try {
1216  rowPointers = relevantGraph->getNodeRowPtrs ();
1217  }
1218  catch (std::exception &e) {
1219  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1220  true, std::runtime_error,
1221  "Caught exception while calling graph->getNodeRowPtrs(): "
1222  << e.what ());
1223  }
1224  try {
1225  columnIndices = relevantGraph->getNodePackedIndices ();
1226  }
1227  catch (std::exception &e) {
1228  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1229  true, std::runtime_error,
1230  "Caught exception while calling graph->getNodePackedIndices(): "
1231  << e.what ());
1232  }
1233  Teuchos::ArrayRCP<const impl_scalar_type> vals =
1234  Kokkos::Compat::persistingView (k_values1D_);
1235  values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
1236  }
1237 
1238  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1239  void
1241  fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1242  {
1244  using ::Tpetra::Details::getEntryOnHost;
1245  using Kokkos::create_mirror_view;
1246  using Teuchos::arcp_const_cast;
1247  using Teuchos::Array;
1248  using Teuchos::ArrayRCP;
1249  using Teuchos::null;
1250  using Teuchos::RCP;
1251  using Teuchos::rcp;
1252  using std::endl;
1253  using row_map_type = typename local_matrix_type::row_map_type;
1254  using lclinds_1d_type = typename Graph::local_graph_type::entries_type::non_const_type;
1255  using values_type = typename local_matrix_type::values_type;
1256  Details::ProfilingRegion regionFLGAM
1257  ("Tpetra::CrsGraph::fillLocalGraphAndMatrix");
1258 
1259  const char tfecfFuncName[] = "fillLocalGraphAndMatrix (called from "
1260  "fillComplete or expertStaticFillComplete): ";
1261  const char suffix[] =
1262  " Please report this bug to the Tpetra developers.";
1263  const bool debug = Details::Behavior::debug("CrsMatrix");
1264  const bool verbose = Details::Behavior::verbose("CrsMatrix");
1265 
1266  std::unique_ptr<std::string> prefix;
1267  if (verbose) {
1268  prefix = this->createPrefix("CrsMatrix", "fillLocalGraphAndMatrix");
1269  std::ostringstream os;
1270  os << *prefix << endl;
1271  std::cerr << os.str ();
1272  }
1273 
1274  if (debug) {
1275  // fillComplete() only calls fillLocalGraphAndMatrix() if the
1276  // matrix owns the graph, which means myGraph_ is not null.
1277  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1278  (myGraph_.is_null (), std::logic_error, "The nonconst graph "
1279  "(myGraph_) is null. This means that the matrix has a "
1280  "const (a.k.a. \"static\") graph. fillComplete or "
1281  "expertStaticFillComplete should never call "
1282  "fillLocalGraphAndMatrix in that case." << suffix);
1283  }
1284 
1285  const size_t lclNumRows = this->getNodeNumRows ();
1286 
1287  // This method's goal is to fill in the three arrays (compressed
1288  // sparse row format) that define the sparse graph's and matrix's
1289  // structure, and the sparse matrix's values.
1290  //
1291  // Use the nonconst version of row_map_type for k_ptrs,
1292  // because row_map_type is const and we need to modify k_ptrs here.
1293  typename row_map_type::non_const_type k_ptrs;
1294  row_map_type k_ptrs_const;
1295  lclinds_1d_type k_inds;
1296  values_type k_vals;
1297 
1298  // Get references to the data in myGraph_, so we can modify them
1299  // as well. Note that we only call fillLocalGraphAndMatrix() if
1300  // the matrix owns the graph, which means myGraph_ is not null.
1301  lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_;
1302 
1303  typedef decltype (myGraph_->k_numRowEntries_) row_entries_type;
1304 
1305  // StaticProfile means that the matrix's column indices and
1306  // values are currently stored in a 1-D format, with row offsets
1307  // in k_rowPtrs_ and local column indices in k_lclInds1D_.
1308 
1309  // StaticProfile also means that the graph's array of row
1310  // offsets must already be allocated.
1311  typename Graph::local_graph_type::row_map_type curRowOffsets =
1312  myGraph_->k_rowPtrs_;
1313 
1314  if (debug) {
1315  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1316  (curRowOffsets.extent (0) == 0, std::logic_error,
1317  "(StaticProfile branch) curRowOffsets.extent(0) == 0.");
1318  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1319  (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1320  "(StaticProfile branch) curRowOffsets.extent(0) = "
1321  << curRowOffsets.extent (0) << " != lclNumRows + 1 = "
1322  << (lclNumRows + 1) << ".");
1323  const size_t numOffsets = curRowOffsets.extent (0);
1324  const auto valToCheck =
1325  getEntryOnHost (curRowOffsets, numOffsets - 1);
1326  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1327  (numOffsets != 0 &&
1328  myGraph_->k_lclInds1D_.extent (0) != valToCheck,
1329  std::logic_error, "(StaticProfile branch) numOffsets = " <<
1330  numOffsets << " != 0 and myGraph_->k_lclInds1D_.extent(0) = "
1331  << myGraph_->k_lclInds1D_.extent (0) << " != curRowOffsets("
1332  << numOffsets << ") = " << valToCheck << ".");
1333  }
1334 
1335  if (myGraph_->getNodeNumEntries() !=
1336  myGraph_->getNodeAllocationSize()) {
1337  if (verbose) {
1338  std::ostringstream os;
1339  const auto numEnt = myGraph_->getNodeNumEntries();
1340  const auto allocSize = myGraph_->getNodeAllocationSize();
1341  os << *prefix << "Unpacked 1-D storage: numEnt=" << numEnt
1342  << ", allocSize=" << allocSize << endl;
1343  std::cerr << os.str ();
1344  }
1345  // The matrix's current 1-D storage is "unpacked." This means
1346  // the row offsets may differ from what the final row offsets
1347  // should be. This could happen, for example, if the user
1348  // specified StaticProfile in the constructor and set an upper
1349  // bound on the number of entries per row, but didn't fill all
1350  // those entries.
1351  if (debug && curRowOffsets.extent (0) != 0) {
1352  const size_t numOffsets =
1353  static_cast<size_t> (curRowOffsets.extent (0));
1354  const auto valToCheck =
1355  getEntryOnHost (curRowOffsets, numOffsets - 1);
1356  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1357  (static_cast<size_t> (valToCheck) !=
1358  static_cast<size_t> (k_values1D_.extent (0)),
1359  std::logic_error, "(StaticProfile unpacked branch) Before "
1360  "allocating or packing, curRowOffsets(" << (numOffsets-1)
1361  << ") = " << valToCheck << " != k_values1D_.extent(0)"
1362  " = " << k_values1D_.extent (0) << ".");
1363  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1364  (static_cast<size_t> (valToCheck) !=
1365  static_cast<size_t> (myGraph_->k_lclInds1D_.extent (0)),
1366  std::logic_error, "(StaticProfile unpacked branch) Before "
1367  "allocating or packing, curRowOffsets(" << (numOffsets-1)
1368  << ") = " << valToCheck
1369  << " != myGraph_->k_lclInds1D_.extent(0) = "
1370  << myGraph_->k_lclInds1D_.extent (0) << ".");
1371  }
1372  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1373  // the array of valid entry counts per row.
1374 
1375  // Total number of entries in the matrix on the calling
1376  // process. We will compute this in the loop below. It's
1377  // cheap to compute and useful as a sanity check.
1378  size_t lclTotalNumEntries = 0;
1379  // This will be a host view of packed row offsets.
1380  typename row_map_type::non_const_type::HostMirror h_ptrs;
1381  {
1382  // Allocate the packed row offsets array. We use a nonconst
1383  // temporary (packedRowOffsets) here, because k_ptrs is
1384  // const. We will assign packedRowOffsets to k_ptrs below.
1385  if (verbose) {
1386  std::ostringstream os;
1387  os << *prefix << "Allocate packed row offsets: "
1388  << (lclNumRows+1) << endl;
1389  std::cerr << os.str ();
1390  }
1391  typename row_map_type::non_const_type
1392  packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
1393  typename row_entries_type::const_type numRowEnt_h =
1394  myGraph_->k_numRowEntries_;
1395  // We're computing offsets on device. This function can
1396  // handle numRowEnt_h being a host View.
1397  lclTotalNumEntries =
1398  computeOffsetsFromCounts (packedRowOffsets, numRowEnt_h);
1399  // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1400  // to use packedRowOffsets in the loop above and assign here.
1401  k_ptrs = packedRowOffsets;
1402  k_ptrs_const = k_ptrs;
1403  }
1404 
1405  if (debug) {
1406  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1407  (static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1408  std::logic_error,
1409  "(StaticProfile unpacked branch) After packing k_ptrs, "
1410  "k_ptrs.extent(0) = " << k_ptrs.extent (0) << " != "
1411  "lclNumRows+1 = " << (lclNumRows+1) << ".");
1412  const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1413  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1414  (valToCheck != lclTotalNumEntries, std::logic_error,
1415  "(StaticProfile unpacked branch) After filling k_ptrs, "
1416  "k_ptrs(lclNumRows=" << lclNumRows << ") = " << valToCheck
1417  << " != total number of entries on the calling process = "
1418  << lclTotalNumEntries << ".");
1419  }
1420 
1421  // Allocate the arrays of packed column indices and values.
1422  if (verbose) {
1423  std::ostringstream os;
1424  os << *prefix << "Allocate packed local column indices: "
1425  << lclTotalNumEntries << endl;
1426  std::cerr << os.str ();
1427  }
1428  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1429  if (verbose) {
1430  std::ostringstream os;
1431  os << *prefix << "Allocate packed values: "
1432  << lclTotalNumEntries << endl;
1433  std::cerr << os.str ();
1434  }
1435  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1436 
1437  // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_,
1438  // and k_values1D_ are currently unpacked. Pack them, using
1439  // the packed row offsets array k_ptrs that we created above.
1440  //
1441  // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1442  // need to keep around the unpacked row offsets, column
1443  // indices, and values arrays.
1444 
1445  // Pack the column indices from unpacked k_lclInds1D_ into
1446  // packed k_inds. We will replace k_lclInds1D_ below.
1447  using inds_packer_type = pack_functor<
1448  typename Graph::local_graph_type::entries_type::non_const_type,
1449  typename Graph::local_graph_type::row_map_type>;
1450  inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_,
1451  k_ptrs, curRowOffsets);
1452  using exec_space = typename decltype (k_inds)::execution_space;
1453  using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1454  Kokkos::parallel_for
1455  ("Tpetra::CrsMatrix pack column indices",
1456  range_type (0, lclNumRows), indsPacker);
1457 
1458  // Pack the values from unpacked k_values1D_ into packed
1459  // k_vals. We will replace k_values1D_ below.
1460  using vals_packer_type = pack_functor<values_type, row_map_type>;
1461  vals_packer_type valsPacker (k_vals, this->k_values1D_,
1462  k_ptrs, curRowOffsets);
1463  Kokkos::parallel_for ("Tpetra::CrsMatrix pack values",
1464  range_type (0, lclNumRows), valsPacker);
1465 
1466  if (debug) {
1467  const char myPrefix[] = "(StaticProfile \"Optimize Storage\""
1468  "=true branch) After packing, ";
1469  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1470  (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1471  << "k_ptrs.extent(0) = 0. This probably means that "
1472  "k_rowPtrs_ was never allocated.");
1473  if (k_ptrs.extent (0) != 0) {
1474  const size_t numOffsets (k_ptrs.extent (0));
1475  const auto valToCheck =
1476  getEntryOnHost (k_ptrs, numOffsets - 1);
1477  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1478  (size_t (valToCheck) != k_vals.extent (0),
1479  std::logic_error, myPrefix <<
1480  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1481  " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1482  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1483  (size_t (valToCheck) != k_inds.extent (0),
1484  std::logic_error, myPrefix <<
1485  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1486  " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1487  }
1488  }
1489  }
1490  else { // We don't have to pack, so just set the pointers.
1491  if (verbose) {
1492  std::ostringstream os;
1493  os << *prefix << "Storage already packed: k_rowPtrs_: "
1494  << myGraph_->k_rowPtrs_.extent(0) << ", k_lclInds1D_: "
1495  << myGraph_->k_lclInds1D_.extent(0) << ", k_values1D_: "
1496  << k_values1D_.extent(0) << endl;
1497  std::cerr << os.str();
1498  }
1499  k_ptrs_const = myGraph_->k_rowPtrs_;
1500  k_inds = myGraph_->k_lclInds1D_;
1501  k_vals = this->k_values1D_;
1502 
1503  if (debug) {
1504  const char myPrefix[] =
1505  "(StaticProfile \"Optimize Storage\"=false branch) ";
1506  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1507  (k_ptrs_const.extent (0) == 0, std::logic_error, myPrefix
1508  << "k_ptrs_const.extent(0) = 0. This probably means "
1509  "that k_rowPtrs_ was never allocated.");
1510  if (k_ptrs_const.extent (0) != 0) {
1511  const size_t numOffsets (k_ptrs_const.extent (0));
1512  const auto valToCheck =
1513  getEntryOnHost (k_ptrs_const, numOffsets - 1);
1514  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1515  (size_t (valToCheck) != k_vals.extent (0),
1516  std::logic_error, myPrefix <<
1517  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1518  << " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1519  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1520  (size_t (valToCheck) != k_inds.extent (0),
1521  std::logic_error, myPrefix <<
1522  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1523  << " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1524  }
1525  }
1526  }
1527 
1528  if (debug) {
1529  const char myPrefix[] = "After packing, ";
1530  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1531  (size_t (k_ptrs_const.extent (0)) != size_t (lclNumRows + 1),
1532  std::logic_error, myPrefix << "k_ptrs_const.extent(0) = "
1533  << k_ptrs_const.extent (0) << " != lclNumRows+1 = " <<
1534  (lclNumRows+1) << ".");
1535  if (k_ptrs_const.extent (0) != 0) {
1536  const size_t numOffsets (k_ptrs_const.extent (0));
1537  const size_t k_ptrs_const_numOffsetsMinus1 =
1538  getEntryOnHost (k_ptrs_const, numOffsets - 1);
1539  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1540  (k_ptrs_const_numOffsetsMinus1 != size_t (k_vals.extent (0)),
1541  std::logic_error, myPrefix << "k_ptrs_const(" <<
1542  (numOffsets-1) << ") = " << k_ptrs_const_numOffsetsMinus1
1543  << " != k_vals.extent(0) = " << k_vals.extent (0) << ".");
1544  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1545  (k_ptrs_const_numOffsetsMinus1 != size_t (k_inds.extent (0)),
1546  std::logic_error, myPrefix << "k_ptrs_const(" <<
1547  (numOffsets-1) << ") = " << k_ptrs_const_numOffsetsMinus1
1548  << " != k_inds.extent(0) = " << k_inds.extent (0) << ".");
1549  }
1550  }
1551 
1552  // May we ditch the old allocations for the packed (and otherwise
1553  // "optimized") allocations, later in this routine? Optimize
1554  // storage if the graph is not static, or if the graph already has
1555  // optimized storage.
1556  const bool defaultOptStorage =
1557  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1558  const bool requestOptimizedStorage =
1559  (! params.is_null () &&
1560  params->get ("Optimize Storage", defaultOptStorage)) ||
1561  (params.is_null () && defaultOptStorage);
1562 
1563  // The graph has optimized storage when indices are allocated,
1564  // myGraph_->k_numRowEntries_ is empty, and there are more than
1565  // zero rows on this process. It's impossible for the graph to
1566  // have dynamic profile (getProfileType() == DynamicProfile) and
1567  // be optimized (isStorageOptimized()).
1568  if (requestOptimizedStorage) {
1569  // Free the old, unpacked, unoptimized allocations.
1570  // Change the graph from dynamic to static allocation profile
1571 
1572  // Free graph data structures that are only needed for
1573  // unpacked 1-D storage.
1574  if (verbose) {
1575  std::ostringstream os;
1576  os << *prefix << "Optimizing storage: free k_numRowEntries_: "
1577  << myGraph_->k_numRowEntries_.extent(0) << endl;
1578  std::cerr << os.str();
1579  }
1580  myGraph_->k_numRowEntries_ = row_entries_type ();
1581 
1582  // Keep the new 1-D packed allocations.
1583  if (verbose) {
1584  std::ostringstream os;
1585  os << *prefix << "Assign k_rowPtrs_: old="
1586  << myGraph_->k_rowPtrs_.extent(0) << ", new="
1587  << k_ptrs_const.extent(0) << endl;
1588  std::cerr << os.str();
1589  }
1590  myGraph_->k_rowPtrs_ = k_ptrs_const;
1591  if (verbose) {
1592  std::ostringstream os;
1593  os << *prefix << "Assign k_lclInds1D_: old="
1594  << myGraph_->k_lclInds1D_.extent(0) << ", new="
1595  << k_inds.extent(0) << endl;
1596  std::cerr << os.str();
1597  }
1598  myGraph_->k_lclInds1D_ = k_inds;
1599  if (verbose) {
1600  std::ostringstream os;
1601  os << *prefix << "Assign k_values1D_: old="
1602  << k_values1D_.extent(0) << ", new="
1603  << k_vals.extent(0) << endl;
1604  std::cerr << os.str();
1605  }
1606  this->k_values1D_ = k_vals;
1607 
1608  myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1609  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1610  }
1611  else {
1612  if (verbose) {
1613  std::ostringstream os;
1614  os << *prefix << "User requestetd NOT to optimize storage"
1615  << endl;
1616  std::cerr << os.str();
1617  }
1618  }
1619 
1620  // Make the local graph, using the arrays of row offsets and
1621  // column indices that we built above. The local graph should be
1622  // null, but we delete it first so that any memory can be freed
1623  // before we allocate the new one.
1624  //
1625  // FIXME (mfh 06,28 Aug 2014) It would make more sense for
1626  // Tpetra::CrsGraph to have a protected method that accepts k_inds
1627  // and k_ptrs, and creates the local graph lclGraph_.
1628  myGraph_->lclGraph_ =
1629  typename Graph::local_graph_type (k_inds, k_ptrs_const);
1630 
1631  // Make the local matrix, using the local graph and vals array.
1632  auto lclMat = std::make_shared<local_matrix_type>
1633  ("Tpetra::CrsMatrix::lclMatrix_", getNodeNumCols (),
1634  k_vals, myGraph_->lclGraph_);
1635  lclMatrix_ = std::make_shared<local_multiply_op_type> (lclMat);
1636  }
1637 
1638  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1639  void
1641  fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1642  {
1643  using ::Tpetra::Details::ProfilingRegion;
1644  using Kokkos::create_mirror_view;
1645  using Teuchos::ArrayRCP;
1646  using Teuchos::Array;
1647  using Teuchos::null;
1648  using Teuchos::RCP;
1649  using Teuchos::rcp;
1650  using std::endl;
1651  using row_map_type = typename Graph::local_graph_type::row_map_type;
1652  using non_const_row_map_type = typename row_map_type::non_const_type;
1653  using values_type = typename local_matrix_type::values_type;
1654  ProfilingRegion regionFLM("Tpetra::CrsMatrix::fillLocalMatrix");
1655  const size_t lclNumRows = getNodeNumRows();
1656 
1657  const bool verbose = Details::Behavior::verbose("CrsMatrix");
1658  std::unique_ptr<std::string> prefix;
1659  if (verbose) {
1660  prefix = this->createPrefix("CrsMatrix", "fillLocalMatrix");
1661  std::ostringstream os;
1662  os << *prefix << "lclNumRows: " << lclNumRows << endl;
1663  std::cerr << os.str ();
1664  }
1665 
1666  // The goals of this routine are first, to allocate and fill
1667  // packed 1-D storage (see below for an explanation) in the vals
1668  // array, and second, to give vals to the local matrix and
1669  // finalize the local matrix. We only need k_ptrs, the packed 1-D
1670  // row offsets, within the scope of this routine, since we're only
1671  // filling the local matrix here (use fillLocalGraphAndMatrix() to
1672  // fill both the graph and the matrix at the same time).
1673 
1674  // get data from staticGraph_
1675  size_t nodeNumEntries = staticGraph_->getNodeNumEntries ();
1676  size_t nodeNumAllocated = staticGraph_->getNodeAllocationSize ();
1677  row_map_type k_rowPtrs_ = staticGraph_->lclGraph_.row_map;
1678 
1679  row_map_type k_ptrs; // "packed" row offsets array
1680  values_type k_vals; // "packed" values array
1681 
1682  // May we ditch the old allocations for the packed (and otherwise
1683  // "optimized") allocations, later in this routine? Request
1684  // optimized storage by default.
1685  bool requestOptimizedStorage = true;
1686  const bool default_OptimizeStorage =
1687  ! isStaticGraph() || staticGraph_->isStorageOptimized();
1688  if (! params.is_null() &&
1689  ! params->get("Optimize Storage", default_OptimizeStorage)) {
1690  requestOptimizedStorage = false;
1691  }
1692  // If we're not allowed to change a static graph, then we can't
1693  // change the storage of the matrix, either. This means that if
1694  // the graph's storage isn't already optimized, we can't optimize
1695  // the matrix's storage either. Check and give warning, as
1696  // appropriate.
1697  if (! staticGraph_->isStorageOptimized () &&
1698  requestOptimizedStorage) {
1700  (true, std::runtime_error, "You requested optimized storage "
1701  "by setting the \"Optimize Storage\" flag to \"true\" in "
1702  "the ParameterList, or by virtue of default behavior. "
1703  "However, the associated CrsGraph was filled separately and "
1704  "requested not to optimize storage. Therefore, the "
1705  "CrsMatrix cannot optimize storage.");
1706  requestOptimizedStorage = false;
1707  }
1708 
1709  using row_entries_type = decltype (staticGraph_->k_numRowEntries_);
1710 
1711  // StaticProfile means that the matrix's values are currently
1712  // stored in a 1-D format. However, this format is "unpacked";
1713  // it doesn't necessarily have the same row offsets as indicated
1714  // by the ptrs array returned by allocRowPtrs. This could
1715  // happen, for example, if the user specified StaticProfile in
1716  // the constructor and fixed the number of matrix entries in
1717  // each row, but didn't fill all those entries.
1718  //
1719  // As above, we don't need to keep the "packed" row offsets
1720  // array ptrs here, but we do need it here temporarily, so we
1721  // have to allocate it. We'll free ptrs later in this method.
1722  //
1723  // Note that this routine checks whether storage has already
1724  // been packed. This is a common case for solution of nonlinear
1725  // PDEs using the finite element method, as long as the
1726  // structure of the sparse matrix does not change between linear
1727  // solves.
1728  if (nodeNumEntries != nodeNumAllocated) {
1729  if (verbose) {
1730  std::ostringstream os;
1731  os << *prefix << "Unpacked 1-D storage: numEnt="
1732  << nodeNumEntries << ", allocSize=" << nodeNumAllocated
1733  << endl;
1734  std::cerr << os.str();
1735  }
1736  // We have to pack the 1-D storage, since the user didn't fill
1737  // up all requested storage.
1738  if (verbose) {
1739  std::ostringstream os;
1740  os << *prefix << "Allocate packed row offsets: "
1741  << (lclNumRows+1) << endl;
1742  std::cerr << os.str();
1743  }
1744  non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1745  lclNumRows+1);
1746  // Total number of entries in the matrix on the calling
1747  // process. We will compute this in the loop below. It's
1748  // cheap to compute and useful as a sanity check.
1749  size_t lclTotalNumEntries = 0;
1750  k_ptrs = tmpk_ptrs;
1751  {
1752  typename row_entries_type::const_type numRowEnt_d =
1753  staticGraph_->k_numRowEntries_;
1754  // This function can handle the counts being a host View.
1755  lclTotalNumEntries =
1756  Details::computeOffsetsFromCounts (tmpk_ptrs, numRowEnt_d);
1757  }
1758 
1759  // Allocate the "packed" values array.
1760  // It has exactly the right number of entries.
1761  if (verbose) {
1762  std::ostringstream os;
1763  os << *prefix << "Allocate packed values: "
1764  << lclTotalNumEntries << endl;
1765  std::cerr << os.str ();
1766  }
1767  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1768 
1769  // Pack k_values1D_ into k_vals. We will replace k_values1D_ below.
1770  pack_functor<values_type, row_map_type> valsPacker
1771  (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_);
1772 
1773  using exec_space = typename decltype (k_vals)::execution_space;
1774  using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1775  Kokkos::parallel_for ("Tpetra::CrsMatrix pack values",
1776  range_type (0, lclNumRows), valsPacker);
1777  }
1778  else { // We don't have to pack, so just set the pointer.
1779  if (verbose) {
1780  std::ostringstream os;
1781  os << *prefix << "Storage already packed: "
1782  << "k_values1D_: " << k_values1D_.extent(0) << endl;
1783  std::cerr << os.str();
1784  }
1785  k_vals = k_values1D_;
1786  }
1787 
1788  // May we ditch the old allocations for the packed one?
1789  if (requestOptimizedStorage) {
1790  // The user requested optimized storage, so we can dump the
1791  // unpacked 1-D storage, and keep the packed storage.
1792  k_values1D_ = k_vals;
1793  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1794  }
1795 
1796  // Build the local sparse matrix object. At this point, the local
1797  // matrix certainly has a column Map. Remember that the local
1798  // matrix's number of columns comes from the column Map, not the
1799  // domain Map.
1800  auto lclMat = std::make_shared<local_matrix_type>
1801  ("Tpetra::CrsMatrix::lclMatrix_",
1802  getColMap ()->getNodeNumElements (),
1803  k_vals, staticGraph_->getLocalGraph ());
1804  lclMatrix_ = std::make_shared<local_multiply_op_type> (lclMat);
1805  }
1806 
1807  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1808  void
1810  insertIndicesAndValues (crs_graph_type& graph,
1811  RowInfo& rowInfo,
1812  const typename crs_graph_type::SLocalGlobalViews& newInds,
1813  const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1814  const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1815  const ELocalGlobal lg,
1816  const ELocalGlobal I)
1817  {
1818  const size_t oldNumEnt = rowInfo.numEntries;
1819  const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1820 
1821  // Use of memcpy here works around an issue with GCC >= 4.9.0,
1822  // that probably relates to scalar_type vs. impl_scalar_type
1823  // aliasing. See history of Tpetra_CrsGraph_def.hpp for
1824  // details; look for GCC_WORKAROUND macro definition.
1825  if (numInserted > 0) {
1826  const size_t startOffset = oldNumEnt;
1827  memcpy (&oldRowVals[startOffset], &newRowVals[0],
1828  numInserted * sizeof (impl_scalar_type));
1829  }
1830  }
1831 
1832  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1833  void
1835  insertLocalValues (const LocalOrdinal lclRow,
1836  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1837  const Teuchos::ArrayView<const Scalar>& values)
1838  {
1839  using std::endl;
1840  typedef impl_scalar_type IST;
1841  const char tfecfFuncName[] = "insertLocalValues: ";
1842 
1843  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1844  (! this->isFillActive (), std::runtime_error,
1845  "Fill is not active. After calling fillComplete, you must call "
1846  "resumeFill before you may insert entries into the matrix again.");
1847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1848  (this->isStaticGraph (), std::runtime_error,
1849  "Cannot insert indices with static graph; use replaceLocalValues() "
1850  "instead.");
1851  // At this point, we know that myGraph_ is nonnull.
1852  crs_graph_type& graph = * (this->myGraph_);
1853  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1854  (graph.colMap_.is_null (), std::runtime_error,
1855  "Cannot insert local indices without a column map.");
1856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1857  (graph.isGloballyIndexed (),
1858  std::runtime_error, "Graph indices are global; use "
1859  "insertGlobalValues().");
1860  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1861  (values.size () != indices.size (), std::runtime_error,
1862  "values.size() = " << values.size ()
1863  << " != indices.size() = " << indices.size () << ".");
1864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1865  ! graph.rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1866  "Local row index " << lclRow << " does not belong to this process.");
1867 
1868  if (! graph.indicesAreAllocated ()) {
1869  // We only allocate values at most once per process, so it's OK
1870  // to check TPETRA_VERBOSE here.
1871  const bool verbose = Details::Behavior::verbose("CrsMatrix");
1872  this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1873  }
1874 
1875 #ifdef HAVE_TPETRA_DEBUG
1876  const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
1877  // In a debug build, test whether any of the given column indices
1878  // are not in the column Map. Keep track of the invalid column
1879  // indices so we can tell the user about them.
1880  {
1881  using Teuchos::toString;
1882 
1883  const map_type& colMap = * (graph.colMap_);
1884  Teuchos::Array<LocalOrdinal> badColInds;
1885  bool allInColMap = true;
1886  for (size_t k = 0; k < numEntriesToAdd; ++k) {
1887  if (! colMap.isNodeLocalElement (indices[k])) {
1888  allInColMap = false;
1889  badColInds.push_back (indices[k]);
1890  }
1891  }
1892  if (! allInColMap) {
1893  std::ostringstream os;
1894  os << "You attempted to insert entries in owned row " << lclRow
1895  << ", at the following column indices: " << toString (indices)
1896  << "." << endl;
1897  os << "Of those, the following indices are not in the column Map on "
1898  "this process: " << toString (badColInds) << "." << endl << "Since "
1899  "the matrix has a column Map already, it is invalid to insert "
1900  "entries at those locations.";
1901  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1902  (true, std::invalid_argument, os.str ());
1903  }
1904  }
1905 #endif // HAVE_TPETRA_DEBUG
1906 
1907  RowInfo rowInfo = graph.getRowInfo (lclRow);
1908 
1909  Teuchos::ArrayView<IST> valsView = this->getViewNonConst(rowInfo);
1910  auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset) {
1911  valsView[offset] += values[k]; };
1912  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1913  graph.insertLocalIndicesImpl(lclRow, indices, cb);
1914  }
1915 
1916  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1917  void
1919  insertLocalValues (const LocalOrdinal localRow,
1920  const LocalOrdinal numEnt,
1921  const Scalar vals[],
1922  const LocalOrdinal cols[])
1923  {
1924  Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1925  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1926  this->insertLocalValues (localRow, colsT, valsT);
1927  }
1928 
1929  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1930  void
1932  insertGlobalValuesImpl (crs_graph_type& graph,
1933  RowInfo& rowInfo,
1934  const GlobalOrdinal gblColInds[],
1935  const impl_scalar_type vals[],
1936  const size_t numInputEnt)
1937  {
1938  typedef impl_scalar_type IST;
1939 #ifdef HAVE_TPETRA_DEBUG
1940  const char tfecfFuncName[] = "insertGlobalValuesImpl: ";
1941  const size_t origNumEnt = graph.getNumEntriesInLocalRow (rowInfo.localRow);
1942  const size_t curNumEnt = rowInfo.numEntries;
1943 #endif // HAVE_TPETRA_DEBUG
1944 
1945  if (! graph.indicesAreAllocated ()) {
1946  // We only allocate values at most once per process, so it's OK
1947  // to check TPETRA_VERBOSE here.
1948  using ::Tpetra::Details::Behavior;
1949  const bool verbose = Behavior::verbose("CrsMatrix");
1950  this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1951  // mfh 23 Jul 2017: allocateValues invalidates existing
1952  // getRowInfo results. Once we get rid of lazy graph
1953  // allocation, we'll be able to move the getRowInfo call outside
1954  // of this method.
1955  rowInfo = graph.getRowInfo (rowInfo.localRow);
1956  }
1957 
1958  Teuchos::ArrayView<IST> valsView = this->getViewNonConst(rowInfo);
1959  auto fun = [&](size_t const k, size_t const /*start*/, size_t const offset) {
1960  valsView[offset] += vals[k];
1961  };
1962  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
1963 #ifdef HAVE_TPETRA_DEBUG
1964  //numInserted is only used inside the debug code below.
1965  auto numInserted =
1966 #endif
1967  graph.insertGlobalIndicesImpl(rowInfo, gblColInds, numInputEnt, cb);
1968 
1969 #ifdef HAVE_TPETRA_DEBUG
1970  size_t newNumEnt = curNumEnt + numInserted;
1971  const size_t chkNewNumEnt =
1972  graph.getNumEntriesInLocalRow (rowInfo.localRow);
1973  if (chkNewNumEnt != newNumEnt) {
1974  std::ostringstream os;
1975  os << std::endl << "newNumEnt = " << newNumEnt
1976  << " != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1977  << ") = " << chkNewNumEnt << "." << std::endl
1978  << "\torigNumEnt: " << origNumEnt << std::endl
1979  << "\tnumInputEnt: " << numInputEnt << std::endl
1980  << "\tgblColInds: [";
1981  for (size_t k = 0; k < numInputEnt; ++k) {
1982  os << gblColInds[k];
1983  if (k + size_t (1) < numInputEnt) {
1984  os << ",";
1985  }
1986  }
1987  os << "]" << std::endl
1988  << "\tvals: [";
1989  for (size_t k = 0; k < numInputEnt; ++k) {
1990  os << vals[k];
1991  if (k + size_t (1) < numInputEnt) {
1992  os << ",";
1993  }
1994  }
1995  os << "]" << std::endl;
1996 
1997  if (this->supportsRowViews ()) {
1998  Teuchos::ArrayView<const Scalar> vals2;
1999  if (this->isGloballyIndexed ()) {
2000  Teuchos::ArrayView<const GlobalOrdinal> gblColInds2;
2001  const GlobalOrdinal gblRow =
2002  graph.rowMap_->getGlobalElement (rowInfo.localRow);
2003  if (gblRow == Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
2004  os << "Local row index " << rowInfo.localRow << " is invalid!" << std::endl;
2005  }
2006  else {
2007  bool getViewThrew = false;
2008  try {
2009  this->getGlobalRowView (gblRow, gblColInds2, vals2);
2010  }
2011  catch (std::exception& e) {
2012  getViewThrew = true;
2013  os << "getGlobalRowView threw exception:" << std::endl
2014  << e.what () << std::endl;
2015  }
2016  if (! getViewThrew) {
2017  os << "\tNew global column indices: "
2018  << Teuchos::toString (gblColInds2) << std::endl
2019  << "\tNew values: " << Teuchos::toString (vals2) << std::endl;
2020  }
2021  }
2022  }
2023  else if (this->isLocallyIndexed ()) {
2024  Teuchos::ArrayView<const LocalOrdinal> lclColInds2;
2025  this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
2026  os << "\tNew local column indices: " << Teuchos::toString (lclColInds2)
2027  << std::endl;
2028  os << "\tNew values: " << Teuchos::toString (vals2) << std::endl;
2029  }
2030  }
2031 
2032  os << "Please report this bug to the Tpetra developers.";
2033  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2034  (true, std::logic_error, os.str ());
2035  }
2036 #endif // HAVE_TPETRA_DEBUG
2037  }
2038 
2039  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2040  void
2042  insertGlobalValues (const GlobalOrdinal gblRow,
2043  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2044  const Teuchos::ArrayView<const Scalar>& values)
2045  {
2046  using Teuchos::toString;
2047  using std::endl;
2048  typedef impl_scalar_type IST;
2049  typedef LocalOrdinal LO;
2050  typedef GlobalOrdinal GO;
2051  typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2052  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
2053  const char tfecfFuncName[] = "insertGlobalValues: ";
2054 
2055 #ifdef HAVE_TPETRA_DEBUG
2056  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2057  (values.size () != indices.size (), std::runtime_error,
2058  "values.size() = " << values.size () << " != indices.size() = "
2059  << indices.size () << ".");
2060 #endif // HAVE_TPETRA_DEBUG
2061 
2062  // getRowMap() is not thread safe, because it increments RCP's
2063  // reference count. getCrsGraphRef() is thread safe.
2064  const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2065  const LO lclRow = rowMap.getLocalElement (gblRow);
2066 
2067  if (lclRow == OTLO::invalid ()) {
2068  // Input row is _not_ owned by the calling process.
2069  //
2070  // See a note (now deleted) from mfh 14 Dec 2012: If input row
2071  // is not in the row Map, it doesn't matter whether or not the
2072  // graph is static; the data just get stashed for later use by
2073  // globalAssemble().
2074  this->insertNonownedGlobalValues (gblRow, indices, values);
2075  }
2076  else { // Input row _is_ owned by the calling process
2077  if (this->isStaticGraph ()) {
2078  // Uh oh! Not allowed to insert into owned rows in that case.
2079  const int myRank = rowMap.getComm ()->getRank ();
2080  const int numProcs = rowMap.getComm ()->getSize ();
2081  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2082  (true, std::runtime_error,
2083  "The matrix was constructed with a constant (\"static\") graph, "
2084  "yet the given global row index " << gblRow << " is in the row "
2085  "Map on the calling process (with rank " << myRank << ", of " <<
2086  numProcs << " process(es)). In this case, you may not insert "
2087  "new entries into rows owned by the calling process.");
2088  }
2089 
2090  crs_graph_type& graph = * (this->myGraph_);
2091  const IST* const inputVals =
2092  reinterpret_cast<const IST*> (values.getRawPtr ());
2093  const GO* const inputGblColInds = indices.getRawPtr ();
2094  const size_t numInputEnt = indices.size ();
2095  RowInfo rowInfo = graph.getRowInfo (lclRow);
2096 
2097  // If the matrix has a column Map, check at this point whether
2098  // the column indices belong to the column Map.
2099  //
2100  // FIXME (mfh 16 May 2013) We may want to consider deferring the
2101  // test to the CrsGraph method, since it may have to do this
2102  // anyway.
2103  if (! graph.colMap_.is_null ()) {
2104  const map_type& colMap = * (graph.colMap_);
2105  // In a debug build, keep track of the nonowned ("bad") column
2106  // indices, so that we can display them in the exception
2107  // message. In a release build, just ditch the loop early if
2108  // we encounter a nonowned column index.
2109 #ifdef HAVE_TPETRA_DEBUG
2110  Teuchos::Array<GO> badColInds;
2111 #endif // HAVE_TPETRA_DEBUG
2112  const size_type numEntriesToInsert = indices.size ();
2113  bool allInColMap = true;
2114  for (size_type k = 0; k < numEntriesToInsert; ++k) {
2115  if (! colMap.isNodeGlobalElement (indices[k])) {
2116  allInColMap = false;
2117 #ifdef HAVE_TPETRA_DEBUG
2118  badColInds.push_back (indices[k]);
2119 #else
2120  break;
2121 #endif // HAVE_TPETRA_DEBUG
2122  }
2123  }
2124  if (! allInColMap) {
2125  std::ostringstream os;
2126  os << "You attempted to insert entries in owned row " << gblRow
2127  << ", at the following column indices: " << toString (indices)
2128  << "." << endl;
2129 #ifdef HAVE_TPETRA_DEBUG
2130  os << "Of those, the following indices are not in the column Map "
2131  "on this process: " << toString (badColInds) << "." << endl
2132  << "Since the matrix has a column Map already, it is invalid "
2133  "to insert entries at those locations.";
2134 #else
2135  os << "At least one of those indices is not in the column Map "
2136  "on this process." << endl << "It is invalid to insert into "
2137  "columns not in the column Map on the process that owns the "
2138  "row.";
2139 #endif // HAVE_TPETRA_DEBUG
2140  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2141  (true, std::invalid_argument, os.str ());
2142  }
2143  }
2144 
2145  this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2146  inputVals, numInputEnt);
2147  }
2148  }
2149 
2150 
2151  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2152  void
2154  insertGlobalValues (const GlobalOrdinal globalRow,
2155  const LocalOrdinal numEnt,
2156  const Scalar vals[],
2157  const GlobalOrdinal inds[])
2158  {
2159  Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2160  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2161  this->insertGlobalValues (globalRow, indsT, valsT);
2162  }
2163 
2164 
2165  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2166  void
2169  const GlobalOrdinal gblRow,
2170  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2171  const Teuchos::ArrayView<const Scalar>& values,
2172  const bool debug)
2173  {
2174  typedef impl_scalar_type IST;
2175  typedef LocalOrdinal LO;
2176  typedef GlobalOrdinal GO;
2177  typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2178  const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
2179 
2180  if (debug) {
2181  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2182  (values.size () != indices.size (), std::runtime_error,
2183  "values.size() = " << values.size () << " != indices.size() = "
2184  << indices.size () << ".");
2185  }
2186 
2187  // getRowMap() is not thread safe, because it increments RCP's
2188  // reference count. getCrsGraphRef() is thread safe.
2189  const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2190  const LO lclRow = rowMap.getLocalElement (gblRow);
2191  if (lclRow == OTLO::invalid ()) {
2192  // Input row is _not_ owned by the calling process.
2193  //
2194  // See a note (now deleted) from mfh 14 Dec 2012: If input row
2195  // is not in the row Map, it doesn't matter whether or not the
2196  // graph is static; the data just get stashed for later use by
2197  // globalAssemble().
2198  this->insertNonownedGlobalValues (gblRow, indices, values);
2199  }
2200  else { // Input row _is_ owned by the calling process
2201  if (this->isStaticGraph ()) {
2202  // Uh oh! Not allowed to insert into owned rows in that case.
2203  const int myRank = rowMap.getComm ()->getRank ();
2204  const int numProcs = rowMap.getComm ()->getSize ();
2205  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2206  (true, std::runtime_error,
2207  "The matrix was constructed with a constant (\"static\") graph, "
2208  "yet the given global row index " << gblRow << " is in the row "
2209  "Map on the calling process (with rank " << myRank << ", of " <<
2210  numProcs << " process(es)). In this case, you may not insert "
2211  "new entries into rows owned by the calling process.");
2212  }
2213 
2214  crs_graph_type& graph = * (this->myGraph_);
2215  const IST* const inputVals =
2216  reinterpret_cast<const IST*> (values.getRawPtr ());
2217  const GO* const inputGblColInds = indices.getRawPtr ();
2218  const size_t numInputEnt = indices.size ();
2219  RowInfo rowInfo = graph.getRowInfo (lclRow);
2220 
2221  if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2222  // This branch is similar in function to the following branch, but for
2223  // the special case that the target graph is locally indexed (and the
2224  // profile type is StaticProfile). In this case, we cannot simply filter
2225  // out global indices that don't exist on the receiving process and
2226  // insert the remaining (global) indices, but we must convert them (the
2227  // remaining global indices) to local and call `insertLocalValues`.
2228  const map_type& colMap = * (graph.colMap_);
2229  size_t curOffset = 0;
2230  while (curOffset < numInputEnt) {
2231  // Find a sequence of input indices that are in the column Map on the
2232  // calling process. Doing a sequence at a time, instead of one at a
2233  // time, amortizes some overhead.
2234  Teuchos::Array<LO> lclIndices;
2235  size_t endOffset = curOffset;
2236  for ( ; endOffset < numInputEnt; ++endOffset) {
2237  auto lclIndex = colMap.getLocalElement(inputGblColInds[endOffset]);
2238  if (lclIndex != OTLO::invalid())
2239  lclIndices.push_back(lclIndex);
2240  else
2241  break;
2242  }
2243  // curOffset, endOffset: half-exclusive range of indices in the column
2244  // Map on the calling process. If endOffset == curOffset, the range is
2245  // empty.
2246  const LO numIndInSeq = (endOffset - curOffset);
2247  if (numIndInSeq != 0) {
2248  this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2249  }
2250  // Invariant before the increment line: Either endOffset ==
2251  // numInputEnt, or inputGblColInds[endOffset] is not in the column Map
2252  // on the calling process.
2253  if (debug) {
2254  const bool invariant = endOffset == numInputEnt ||
2255  colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2256  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2257  (! invariant, std::logic_error, std::endl << "Invariant failed!");
2258  }
2259  curOffset = endOffset + 1;
2260  }
2261  }
2262  else if (! graph.colMap_.is_null ()) { // We have a column Map.
2263  const map_type& colMap = * (graph.colMap_);
2264  size_t curOffset = 0;
2265  while (curOffset < numInputEnt) {
2266  // Find a sequence of input indices that are in the column
2267  // Map on the calling process. Doing a sequence at a time,
2268  // instead of one at a time, amortizes some overhead.
2269  size_t endOffset = curOffset;
2270  for ( ; endOffset < numInputEnt &&
2271  colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2272  ++endOffset)
2273  {}
2274  // curOffset, endOffset: half-exclusive range of indices in
2275  // the column Map on the calling process. If endOffset ==
2276  // curOffset, the range is empty.
2277  const LO numIndInSeq = (endOffset - curOffset);
2278  if (numIndInSeq != 0) {
2279  rowInfo = graph.getRowInfo(lclRow); // KDD 5/19 Need fresh RowInfo in each loop iteration
2280  this->insertGlobalValuesImpl (graph, rowInfo,
2281  inputGblColInds + curOffset,
2282  inputVals + curOffset,
2283  numIndInSeq);
2284  }
2285  // Invariant before the increment line: Either endOffset ==
2286  // numInputEnt, or inputGblColInds[endOffset] is not in the
2287  // column Map on the calling process.
2288  if (debug) {
2289  const bool invariant = endOffset == numInputEnt ||
2290  colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2291  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2292  (! invariant, std::logic_error, std::endl << "Invariant failed!");
2293  }
2294  curOffset = endOffset + 1;
2295  }
2296  }
2297  else { // we don't have a column Map.
2298  this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2299  inputVals, numInputEnt);
2300  }
2301  }
2302  }
2303 
2304  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2305  void
2306  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2307  insertGlobalValuesFilteredChecked(
2308  const GlobalOrdinal gblRow,
2309  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2310  const Teuchos::ArrayView<const Scalar>& values,
2311  const char* const prefix,
2312  const bool debug,
2313  const bool verbose)
2314  {
2316  using std::endl;
2317 
2318  try {
2319  insertGlobalValuesFiltered(gblRow, indices, values, debug);
2320  }
2321  catch(std::exception& e) {
2322  std::ostringstream os;
2323  if (verbose) {
2324  const size_t maxNumToPrint =
2326  os << *prefix << ": insertGlobalValuesFiltered threw an "
2327  "exception: " << e.what() << endl
2328  << "Global row index: " << gblRow << endl;
2329  verbosePrintArray(os, indices, "Global column indices",
2330  maxNumToPrint);
2331  os << endl;
2332  verbosePrintArray(os, values, "Values", maxNumToPrint);
2333  os << endl;
2334  }
2335  else {
2336  os << ": insertGlobalValuesFiltered threw an exception: "
2337  << e.what();
2338  }
2339  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
2340  }
2341  }
2342 
2343  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2344  LocalOrdinal
2345  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2346  replaceLocalValuesImpl (impl_scalar_type rowVals[],
2347  const crs_graph_type& graph,
2348  const RowInfo& rowInfo,
2349  const LocalOrdinal inds[],
2350  const impl_scalar_type newVals[],
2351  const LocalOrdinal numElts) const
2352  {
2353  typedef LocalOrdinal LO;
2354  typedef GlobalOrdinal GO;
2355  const bool sorted = graph.isSorted ();
2356 
2357  size_t hint = 0; // Guess for the current index k into rowVals
2358  LO numValid = 0; // number of valid local column indices
2359 
2360  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2361  // accurately, it assumes that the host execution space can
2362  // access data in both InputMemorySpace and ValsMemorySpace.
2363 
2364  if (graph.isLocallyIndexed ()) {
2365  // Get a view of the column indices in the row. This amortizes
2366  // the cost of getting the view over all the entries of inds.
2367  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2368 
2369  for (LO j = 0; j < numElts; ++j) {
2370  const LO lclColInd = inds[j];
2371  const size_t offset =
2372  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2373  lclColInd, hint, sorted);
2374  if (offset != rowInfo.numEntries) {
2375  rowVals[offset] = newVals[j];
2376  hint = offset + 1;
2377  ++numValid;
2378  }
2379  }
2380  }
2381  else if (graph.isGloballyIndexed ()) {
2382  if (graph.colMap_.is_null ()) {
2383  return Teuchos::OrdinalTraits<LO>::invalid ();
2384  }
2385  const map_type colMap = * (graph.colMap_);
2386 
2387  // Get a view of the column indices in the row. This amortizes
2388  // the cost of getting the view over all the entries of inds.
2389  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2390 
2391  for (LO j = 0; j < numElts; ++j) {
2392  const GO gblColInd = colMap.getGlobalElement (inds[j]);
2393  if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2394  const size_t offset =
2395  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2396  gblColInd, hint, sorted);
2397  if (offset != rowInfo.numEntries) {
2398  rowVals[offset] = newVals[j];
2399  hint = offset + 1;
2400  ++numValid;
2401  }
2402  }
2403  }
2404  }
2405  // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
2406  // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
2407  // to be neither locally nor globally indexed on a process.
2408  // This means that the graph or matrix has no entries on that
2409  // process. Epetra also works like this. It's related to lazy
2410  // allocation (on first insertion, not at graph / matrix
2411  // construction). Lazy allocation will go away because it is
2412  // not thread scalable.
2413 
2414  return numValid;
2415  }
2416 
2417  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2418  LocalOrdinal
2420  replaceLocalValues (const LocalOrdinal localRow,
2421  const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2422  const Teuchos::ArrayView<const Scalar>& vals) const
2423  {
2424  typedef LocalOrdinal LO;
2425 
2426  const LO numInputEnt = static_cast<LO> (lclCols.size ());
2427  if (static_cast<LO> (vals.size ()) != numInputEnt) {
2428  return Teuchos::OrdinalTraits<LO>::invalid ();
2429  }
2430  const LO* const inputInds = lclCols.getRawPtr ();
2431  const Scalar* const inputVals = vals.getRawPtr ();
2432  return this->replaceLocalValues (localRow, numInputEnt,
2433  inputVals, inputInds);
2434  }
2435 
2436  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2438  local_ordinal_type
2441  const local_ordinal_type localRow,
2442  const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2443  const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals) const
2444  {
2445  using LO = local_ordinal_type;
2446  const LO numInputEnt = inputInds.extent(0);
2447  if (numInputEnt != static_cast<LO>(inputVals.extent(0))) {
2448  return Teuchos::OrdinalTraits<LO>::invalid();
2449  }
2450  const Scalar* const inVals =
2451  reinterpret_cast<const Scalar*>(inputVals.data());
2452  return this->replaceLocalValues(localRow, numInputEnt,
2453  inVals, inputInds.data());
2454  }
2455 
2456  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2457  LocalOrdinal
2459  replaceLocalValues (const LocalOrdinal localRow,
2460  const LocalOrdinal numEnt,
2461  const Scalar inputVals[],
2462  const LocalOrdinal inputCols[]) const
2463  {
2464  typedef impl_scalar_type IST;
2465  typedef LocalOrdinal LO;
2466 
2467  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2468  // Fill must be active and the "nonconst" graph must exist.
2469  return Teuchos::OrdinalTraits<LO>::invalid ();
2470  }
2471  const crs_graph_type& graph = * (this->staticGraph_);
2472  const RowInfo rowInfo = graph.getRowInfo (localRow);
2473 
2474  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2475  // The calling process does not own this row, so it is not
2476  // allowed to modify its values.
2477  return static_cast<LO> (0);
2478  }
2479  auto curRowVals = this->getRowViewNonConst (rowInfo);
2480  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2481  return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2482  inputCols, inVals, numEnt);
2483  }
2484 
2485  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2486  LocalOrdinal
2488  replaceGlobalValuesImpl (impl_scalar_type rowVals[],
2489  const crs_graph_type& graph,
2490  const RowInfo& rowInfo,
2491  const GlobalOrdinal inds[],
2492  const impl_scalar_type newVals[],
2493  const LocalOrdinal numElts) const
2494  {
2495  Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2496  auto fun =
2497  [&](size_t const k, size_t const /*start*/, size_t const offset) {
2498  rowVals[offset] = newVals[k];
2499  };
2500  std::function<void(size_t const, size_t const, size_t const)> cb(std::ref(fun));
2501  return graph.findGlobalIndices(rowInfo, indsT, cb);
2502  }
2503 
2504  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2505  LocalOrdinal
2507  replaceGlobalValues (const GlobalOrdinal globalRow,
2508  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2509  const Teuchos::ArrayView<const Scalar>& inputVals) const
2510  {
2511  typedef LocalOrdinal LO;
2512 
2513  const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2514  if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2515  return Teuchos::OrdinalTraits<LO>::invalid ();
2516  }
2517  return this->replaceGlobalValues (globalRow, numInputEnt,
2518  inputVals.getRawPtr (),
2519  inputGblColInds.getRawPtr ());
2520  }
2521 
2522  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2523  LocalOrdinal
2525  replaceGlobalValues (const GlobalOrdinal globalRow,
2526  const LocalOrdinal numEnt,
2527  const Scalar inputVals[],
2528  const GlobalOrdinal inputGblColInds[]) const
2529  {
2530  typedef impl_scalar_type IST;
2531  typedef LocalOrdinal LO;
2532 
2533  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2534  // Fill must be active and the "nonconst" graph must exist.
2535  return Teuchos::OrdinalTraits<LO>::invalid ();
2536  }
2537  const crs_graph_type& graph = * (this->staticGraph_);
2538 
2539  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (globalRow);
2540  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2541  // The input local row is invalid on the calling process,
2542  // which means that the calling process summed 0 entries.
2543  return static_cast<LO> (0);
2544  }
2545 
2546  auto curRowVals = this->getRowViewNonConst (rowInfo);
2547  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2548  return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2549  inputGblColInds, inVals, numEnt);
2550  }
2551 
2552  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2554  local_ordinal_type
2557  const global_ordinal_type globalRow,
2558  const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2559  const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals) const
2560  {
2561  // We use static_assert here to check the template parameters,
2562  // rather than std::enable_if (e.g., on the return value, to
2563  // enable compilation only if the template parameters match the
2564  // desired attributes). This turns obscure link errors into
2565  // clear compilation errors. It also makes the return value a
2566  // lot easier to see.
2567  using LO = local_ordinal_type;
2568  const LO numInputEnt = static_cast<LO>(inputInds.extent(0));
2569  if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
2570  return Teuchos::OrdinalTraits<LO>::invalid();
2571  }
2572  const Scalar* const inVals =
2573  reinterpret_cast<const Scalar*>(inputVals.data());
2574  return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2575  inputInds.data());
2576  }
2577 
2578  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2579  LocalOrdinal
2581  sumIntoGlobalValuesImpl (impl_scalar_type rowVals[],
2582  const crs_graph_type& graph,
2583  const RowInfo& rowInfo,
2584  const GlobalOrdinal inds[],
2585  const impl_scalar_type newVals[],
2586  const LocalOrdinal numElts,
2587  const bool atomic) const
2588  {
2589  typedef LocalOrdinal LO;
2590  typedef GlobalOrdinal GO;
2591 
2592  const bool sorted = graph.isSorted ();
2593 
2594  size_t hint = 0; // guess at the index's relative offset in the row
2595  LO numValid = 0; // number of valid input column indices
2596 
2597  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
2598  // accurately, it assumes that the host execution space can
2599  // access data in both InputMemorySpace and ValsMemorySpace.
2600 
2601  if (graph.isLocallyIndexed ()) {
2602  // NOTE (mfh 04 Nov 2015) Dereferencing an RCP or reading its
2603  // pointer does NOT change its reference count. Thus, this
2604  // code is still thread safe.
2605  if (graph.colMap_.is_null ()) {
2606  // NO input column indices are valid in this case, since if
2607  // the column Map is null on the calling process, then the
2608  // calling process owns no graph entries.
2609  return numValid;
2610  }
2611  const map_type& colMap = * (graph.colMap_);
2612 
2613  // Get a view of the column indices in the row. This amortizes
2614  // the cost of getting the view over all the entries of inds.
2615  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2616  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2617 
2618  for (LO j = 0; j < numElts; ++j) {
2619  const LO lclColInd = colMap.getLocalElement (inds[j]);
2620  if (lclColInd != LINV) {
2621  const size_t offset =
2622  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2623  lclColInd, hint, sorted);
2624  if (offset != rowInfo.numEntries) {
2625  if (atomic) {
2626  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2627  }
2628  else {
2629  rowVals[offset] += newVals[j];
2630  }
2631  hint = offset + 1;
2632  numValid++;
2633  }
2634  }
2635  }
2636  }
2637  else if (graph.isGloballyIndexed ()) {
2638  // Get a view of the column indices in the row. This amortizes
2639  // the cost of getting the view over all the entries of inds.
2640  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2641 
2642  for (LO j = 0; j < numElts; ++j) {
2643  const GO gblColInd = inds[j];
2644  const size_t offset =
2645  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2646  gblColInd, hint, sorted);
2647  if (offset != rowInfo.numEntries) {
2648  if (atomic) {
2649  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2650  }
2651  else {
2652  rowVals[offset] += newVals[j];
2653  }
2654  hint = offset + 1;
2655  numValid++;
2656  }
2657  }
2658  }
2659  // If the graph is neither locally nor globally indexed on the
2660  // calling process, that means the calling process has no graph
2661  // entries. Thus, none of the input column indices are valid.
2662 
2663  return numValid;
2664  }
2665 
2666  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2667  LocalOrdinal
2669  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2670  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2671  const Teuchos::ArrayView<const Scalar>& inputVals,
2672  const bool atomic)
2673  {
2674  typedef LocalOrdinal LO;
2675 
2676  const LO numInputEnt = static_cast<LO> (inputGblColInds.size ());
2677  if (static_cast<LO> (inputVals.size ()) != numInputEnt) {
2678  return Teuchos::OrdinalTraits<LO>::invalid ();
2679  }
2680  return this->sumIntoGlobalValues (gblRow, numInputEnt,
2681  inputVals.getRawPtr (),
2682  inputGblColInds.getRawPtr (),
2683  atomic);
2684  }
2685 
2686  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2687  LocalOrdinal
2689  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2690  const LocalOrdinal numInputEnt,
2691  const Scalar inputVals[],
2692  const GlobalOrdinal inputGblColInds[],
2693  const bool atomic)
2694  {
2695  typedef impl_scalar_type IST;
2696  typedef LocalOrdinal LO;
2697  typedef GlobalOrdinal GO;
2698 
2699  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2700  // Fill must be active and the "nonconst" graph must exist.
2701  return Teuchos::OrdinalTraits<LO>::invalid ();
2702  }
2703  const crs_graph_type& graph = * (this->staticGraph_);
2704 
2705  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2706  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2707  // mfh 23 Mar 2017, 26 Jul 2017: This branch may not be not
2708  // thread safe in a debug build, in part because it uses
2709  // Teuchos::ArrayView, and in part because of the data structure
2710  // used to stash outgoing entries.
2711  using Teuchos::ArrayView;
2712  ArrayView<const GO> inputGblColInds_av(
2713  numInputEnt == 0 ? nullptr : inputGblColInds,
2714  numInputEnt);
2715  ArrayView<const Scalar> inputVals_av(
2716  numInputEnt == 0 ? nullptr :
2717  inputVals, numInputEnt);
2718  // gblRow is not in the row Map on the calling process, so stash
2719  // the given entries away in a separate data structure.
2720  // globalAssemble() (called during fillComplete()) will exchange
2721  // that data and sum it in using sumIntoGlobalValues().
2722  this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2723  inputVals_av);
2724  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2725  // since we won't know whether the given indices were valid
2726  // until globalAssemble (called in fillComplete) is called.
2727  // That's why insertNonownedGlobalValues doesn't return
2728  // anything. Just for consistency, I'll return the number of
2729  // entries that the user gave us.
2730  return numInputEnt;
2731  }
2732  else { // input row is in the row Map on the calling process
2733  auto curRowVals = this->getRowViewNonConst (rowInfo);
2734  const IST* const inVals = reinterpret_cast<const IST*> (inputVals);
2735  return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2736  inputGblColInds, inVals,
2737  numInputEnt, atomic);
2738  }
2739  }
2740 
2741  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2742  LocalOrdinal
2744  transformLocalValues (const LocalOrdinal lclRow,
2745  const LocalOrdinal numInputEnt,
2746  const impl_scalar_type inputVals[],
2747  const LocalOrdinal inputCols[],
2748  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2749  const bool atomic) const
2750  {
2751  using Tpetra::Details::OrdinalTraits;
2752  typedef LocalOrdinal LO;
2753 
2754  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2755  // Fill must be active and the "nonconst" graph must exist.
2756  return Teuchos::OrdinalTraits<LO>::invalid ();
2757  }
2758  const crs_graph_type& graph = * (this->staticGraph_);
2759  const RowInfo rowInfo = graph.getRowInfo (lclRow);
2760 
2761  if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2762  // The calling process does not own this row, so it is not
2763  // allowed to modify its values.
2764  return static_cast<LO> (0);
2765  }
2766  auto curRowVals = this->getRowViewNonConst (rowInfo);
2767  return this->transformLocalValues (curRowVals.data (), graph,
2768  rowInfo, inputCols, inputVals,
2769  numInputEnt, f, atomic);
2770  }
2771 
2772  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2773  LocalOrdinal
2774  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2775  transformGlobalValues (const GlobalOrdinal gblRow,
2776  const LocalOrdinal numInputEnt,
2777  const impl_scalar_type inputVals[],
2778  const GlobalOrdinal inputCols[],
2779  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2780  const bool atomic) const
2781  {
2782  using Tpetra::Details::OrdinalTraits;
2783  typedef LocalOrdinal LO;
2784 
2785  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2786  // Fill must be active and the "nonconst" graph must exist.
2787  return OrdinalTraits<LO>::invalid ();
2788  }
2789  const crs_graph_type& graph = * (this->staticGraph_);
2790  const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2791 
2792  if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2793  // The calling process does not own this row, so it is not
2794  // allowed to modify its values.
2795  return static_cast<LO> (0);
2796  }
2797  auto curRowVals = this->getRowViewNonConst (rowInfo);
2798  return this->transformGlobalValues (curRowVals.data (), graph,
2799  rowInfo, inputCols, inputVals,
2800  numInputEnt, f, atomic);
2801  }
2802 
2803  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2804  LocalOrdinal
2805  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2806  transformLocalValues (impl_scalar_type rowVals[],
2807  const crs_graph_type& graph,
2808  const RowInfo& rowInfo,
2809  const LocalOrdinal inds[],
2810  const impl_scalar_type newVals[],
2811  const LocalOrdinal numElts,
2812  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2813  const bool atomic) const
2814  {
2815  typedef impl_scalar_type ST;
2816  typedef LocalOrdinal LO;
2817  typedef GlobalOrdinal GO;
2818 
2819  //if (newVals.extent (0) != inds.extent (0)) {
2820  // The sizes of the input arrays must match.
2821  //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
2822  //}
2823  //const LO numElts = static_cast<LO> (inds.extent (0));
2824  const bool sorted = graph.isSorted ();
2825 
2826  LO numValid = 0; // number of valid input column indices
2827  size_t hint = 0; // Guess for the current index k into rowVals
2828 
2829  if (graph.isLocallyIndexed ()) {
2830  // Get a view of the column indices in the row. This amortizes
2831  // the cost of getting the view over all the entries of inds.
2832  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2833 
2834  for (LO j = 0; j < numElts; ++j) {
2835  const LO lclColInd = inds[j];
2836  const size_t offset =
2837  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2838  lclColInd, hint, sorted);
2839  if (offset != rowInfo.numEntries) {
2840  if (atomic) {
2841  // NOTE (mfh 30 Nov 2015) The commented-out code is
2842  // wrong because another thread may have changed
2843  // rowVals[offset] between those two lines of code.
2844  //
2845  //const ST newVal = f (rowVals[offset], newVals[j]);
2846  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2847 
2848  volatile ST* const dest = &rowVals[offset];
2849  (void) atomic_binary_function_update (dest, newVals[j], f);
2850  }
2851  else {
2852  // use binary function f
2853  rowVals[offset] = f (rowVals[offset], newVals[j]);
2854  }
2855  hint = offset + 1;
2856  ++numValid;
2857  }
2858  }
2859  }
2860  else if (graph.isGloballyIndexed ()) {
2861  // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
2862  // pointer does NOT change its reference count. Thus, this
2863  // code is still thread safe.
2864  if (graph.colMap_.is_null ()) {
2865  // NO input column indices are valid in this case. Either
2866  // the column Map hasn't been set yet (so local indices
2867  // don't exist yet), or the calling process owns no graph
2868  // entries.
2869  return numValid;
2870  }
2871  const map_type& colMap = * (graph.colMap_);
2872  // Get a view of the column indices in the row. This amortizes
2873  // the cost of getting the view over all the entries of inds.
2874  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2875 
2876  const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2877  for (LO j = 0; j < numElts; ++j) {
2878  const GO gblColInd = colMap.getGlobalElement (inds[j]);
2879  if (gblColInd != GINV) {
2880  const size_t offset =
2881  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2882  gblColInd, hint, sorted);
2883  if (offset != rowInfo.numEntries) {
2884  if (atomic) {
2885  // NOTE (mfh 30 Nov 2015) The commented-out code is
2886  // wrong because another thread may have changed
2887  // rowVals[offset] between those two lines of code.
2888  //
2889  //const ST newVal = f (rowVals[offset], newVals[j]);
2890  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2891 
2892  volatile ST* const dest = &rowVals[offset];
2893  (void) atomic_binary_function_update (dest, newVals[j], f);
2894  }
2895  else {
2896  // use binary function f
2897  rowVals[offset] = f (rowVals[offset], newVals[j]);
2898  }
2899  hint = offset + 1;
2900  numValid++;
2901  }
2902  }
2903  }
2904  }
2905  // If the graph is neither locally nor globally indexed on the
2906  // calling process, that means the calling process has no graph
2907  // entries. Thus, none of the input column indices are valid.
2908 
2909  return numValid;
2910  }
2911 
2912  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2913  LocalOrdinal
2914  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2915  transformGlobalValues (impl_scalar_type rowVals[],
2916  const crs_graph_type& graph,
2917  const RowInfo& rowInfo,
2918  const GlobalOrdinal inds[],
2919  const impl_scalar_type newVals[],
2920  const LocalOrdinal numElts,
2921  std::function<impl_scalar_type (const impl_scalar_type&, const impl_scalar_type&) > f,
2922  const bool atomic) const
2923  {
2924  typedef impl_scalar_type ST;
2925  typedef LocalOrdinal LO;
2926  typedef GlobalOrdinal GO;
2927 
2928  //if (newVals.extent (0) != inds.extent (0)) {
2929  // The sizes of the input arrays must match.
2930  //return Tpetra::Details::OrdinalTraits<LO>::invalid ();
2931  //}
2932  //const LO numElts = static_cast<LO> (inds.extent (0));
2933  const bool sorted = graph.isSorted ();
2934 
2935  LO numValid = 0; // number of valid input column indices
2936  size_t hint = 0; // Guess for the current index k into rowVals
2937 
2938  if (graph.isGloballyIndexed ()) {
2939  // Get a view of the column indices in the row. This amortizes
2940  // the cost of getting the view over all the entries of inds.
2941  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
2942 
2943  for (LO j = 0; j < numElts; ++j) {
2944  const GO gblColInd = inds[j];
2945  const size_t offset =
2946  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2947  gblColInd, hint, sorted);
2948  if (offset != rowInfo.numEntries) {
2949  if (atomic) {
2950  // NOTE (mfh 30 Nov 2015) The commented-out code is
2951  // wrong because another thread may have changed
2952  // rowVals[offset] between those two lines of code.
2953  //
2954  //const ST newVal = f (rowVals[offset], newVals[j]);
2955  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2956 
2957  volatile ST* const dest = &rowVals[offset];
2958  (void) atomic_binary_function_update (dest, newVals[j], f);
2959  }
2960  else {
2961  // use binary function f
2962  rowVals[offset] = f (rowVals[offset], newVals[j]);
2963  }
2964  hint = offset + 1;
2965  ++numValid;
2966  }
2967  }
2968  }
2969  else if (graph.isLocallyIndexed ()) {
2970  // NOTE (mfh 26 Nov 2015) Dereferencing an RCP or reading its
2971  // pointer does NOT change its reference count. Thus, this
2972  // code is still thread safe.
2973  if (graph.colMap_.is_null ()) {
2974  // NO input column indices are valid in this case. Either the
2975  // column Map hasn't been set yet (so local indices don't
2976  // exist yet), or the calling process owns no graph entries.
2977  return numValid;
2978  }
2979  const map_type& colMap = * (graph.colMap_);
2980  // Get a view of the column indices in the row. This amortizes
2981  // the cost of getting the view over all the entries of inds.
2982  auto colInds = graph.getLocalKokkosRowView (rowInfo);
2983 
2984  const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2985  for (LO j = 0; j < numElts; ++j) {
2986  const LO lclColInd = colMap.getLocalElement (inds[j]);
2987  if (lclColInd != LINV) {
2988  const size_t offset =
2989  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2990  lclColInd, hint, sorted);
2991  if (offset != rowInfo.numEntries) {
2992  if (atomic) {
2993  // NOTE (mfh 30 Nov 2015) The commented-out code is
2994  // wrong because another thread may have changed
2995  // rowVals[offset] between those two lines of code.
2996  //
2997  //const ST newVal = f (rowVals[offset], newVals[j]);
2998  //Kokkos::atomic_assign (&rowVals[offset], newVal);
2999 
3000  volatile ST* const dest = &rowVals[offset];
3001  (void) atomic_binary_function_update (dest, newVals[j], f);
3002  }
3003  else {
3004  // use binary function f
3005  rowVals[offset] = f (rowVals[offset], newVals[j]);
3006  }
3007  hint = offset + 1;
3008  numValid++;
3009  }
3010  }
3011  }
3012  }
3013  // If the graph is neither locally nor globally indexed on the
3014  // calling process, that means the calling process has no graph
3015  // entries. Thus, none of the input column indices are valid.
3016 
3017  return numValid;
3018  }
3019 
3020  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3021  LocalOrdinal
3022  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3023  sumIntoLocalValuesImpl (impl_scalar_type rowVals[],
3024  const crs_graph_type& graph,
3025  const RowInfo& rowInfo,
3026  const LocalOrdinal inds[],
3027  const impl_scalar_type newVals[],
3028  const LocalOrdinal numElts,
3029  const bool atomic) const
3030  {
3031  typedef LocalOrdinal LO;
3032  typedef GlobalOrdinal GO;
3033 
3034  const bool sorted = graph.isSorted ();
3035 
3036  size_t hint = 0; // Guess for the current index k into rowVals
3037  LO numValid = 0; // number of valid local column indices
3038 
3039  // NOTE (mfh 11 Oct 2015) This method assumes UVM. More
3040  // accurately, it assumes that the host execution space can
3041  // access data in both InputMemorySpace and ValsMemorySpace.
3042 
3043  if (graph.isLocallyIndexed ()) {
3044  // Get a view of the column indices in the row. This amortizes
3045  // the cost of getting the view over all the entries of inds.
3046  auto colInds = graph.getLocalKokkosRowView (rowInfo);
3047 
3048  for (LO j = 0; j < numElts; ++j) {
3049  const LO lclColInd = inds[j];
3050  const size_t offset =
3051  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3052  lclColInd, hint, sorted);
3053  if (offset != rowInfo.numEntries) {
3054  if (atomic) {
3055  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3056  }
3057  else {
3058  rowVals[offset] += newVals[j];
3059  }
3060  hint = offset + 1;
3061  ++numValid;
3062  }
3063  }
3064  }
3065  else if (graph.isGloballyIndexed ()) {
3066  if (graph.colMap_.is_null ()) {
3067  return Teuchos::OrdinalTraits<LO>::invalid ();
3068  }
3069  const map_type colMap = * (graph.colMap_);
3070 
3071  // Get a view of the column indices in the row. This amortizes
3072  // the cost of getting the view over all the entries of inds.
3073  auto colInds = graph.getGlobalKokkosRowView (rowInfo);
3074 
3075  for (LO j = 0; j < numElts; ++j) {
3076  const GO gblColInd = colMap.getGlobalElement (inds[j]);
3077  if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3078  const size_t offset =
3079  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3080  gblColInd, hint, sorted);
3081  if (offset != rowInfo.numEntries) {
3082  if (atomic) {
3083  Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3084  }
3085  else {
3086  rowVals[offset] += newVals[j];
3087  }
3088  hint = offset + 1;
3089  ++numValid;
3090  }
3091  }
3092  }
3093  }
3094  // NOTE (mfh 26 Jun 2014, 26 Nov 2015) In the current version of
3095  // CrsGraph and CrsMatrix, it's possible for a matrix (or graph)
3096  // to be neither locally nor globally indexed on a process.
3097  // This means that the graph or matrix has no entries on that
3098  // process. Epetra also works like this. It's related to lazy
3099  // allocation (on first insertion, not at graph / matrix
3100  // construction). Lazy allocation will go away because it is
3101  // not thread scalable.
3102 
3103  return numValid;
3104  }
3105 
3106  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3107  LocalOrdinal
3109  sumIntoLocalValues (const LocalOrdinal localRow,
3110  const Teuchos::ArrayView<const LocalOrdinal>& indices,
3111  const Teuchos::ArrayView<const Scalar>& values,
3112  const bool atomic) const
3113  {
3114  using LO = local_ordinal_type;
3115  const LO numInputEnt = static_cast<LO>(indices.size());
3116  if (static_cast<LO>(values.size()) != numInputEnt) {
3117  return Teuchos::OrdinalTraits<LO>::invalid();
3118  }
3119  const LO* const inputInds = indices.getRawPtr();
3120  const scalar_type* const inputVals = values.getRawPtr();
3121  return this->sumIntoLocalValues(localRow, numInputEnt,
3122  inputVals, inputInds, atomic);
3123  }
3124 
3125  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3127  local_ordinal_type
3130  const local_ordinal_type localRow,
3131  const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3132  const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3133  const bool atomic) const
3134  {
3135  using LO = local_ordinal_type;
3136  const LO numInputEnt = static_cast<LO>(inputInds.extent(0));
3137  if (static_cast<LO>(inputVals.extent(0)) != numInputEnt) {
3138  return Teuchos::OrdinalTraits<LO>::invalid();
3139  }
3140  const scalar_type* inVals =
3141  reinterpret_cast<const scalar_type*>(inputVals.data());
3142  return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3143  inputInds.data(), atomic);
3144  }
3145 
3146  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3147  LocalOrdinal
3149  sumIntoLocalValues (const LocalOrdinal localRow,
3150  const LocalOrdinal numEnt,
3151  const Scalar vals[],
3152  const LocalOrdinal cols[],
3153  const bool atomic) const
3154  {
3155  typedef impl_scalar_type IST;
3156  typedef LocalOrdinal LO;
3157 
3158  if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3159  // Fill must be active and the "nonconst" graph must exist.
3160  return Teuchos::OrdinalTraits<LO>::invalid ();
3161  }
3162  const crs_graph_type& graph = * (this->staticGraph_);
3163  const RowInfo rowInfo = graph.getRowInfo (localRow);
3164 
3165  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3166  // The calling process does not own this row, so it is not
3167  // allowed to modify its values.
3168  return static_cast<LO> (0);
3169  }
3170  auto curRowVals = this->getRowViewNonConst (rowInfo);
3171  const IST* const inputVals = reinterpret_cast<const IST*> (vals);
3172  return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3173  cols, inputVals, numEnt, atomic);
3174  }
3175 
3176  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3177  Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type>
3179  getView (RowInfo rowinfo) const
3180  {
3181  using Kokkos::MemoryUnmanaged;
3182  using Kokkos::View;
3183  using Teuchos::ArrayView;
3184  using ST = impl_scalar_type;
3185  using range_type = std::pair<size_t, size_t>;
3186 
3187  if (k_values1D_.extent (0) != 0 && rowinfo.allocSize > 0) {
3188 #ifdef HAVE_TPETRA_DEBUG
3189  TEUCHOS_TEST_FOR_EXCEPTION(
3190  rowinfo.offset1D + rowinfo.allocSize > k_values1D_.extent (0),
3191  std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
3192  "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
3193  rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
3194  ") > k_values1D_.extent(0) (" << k_values1D_.extent (0) << ").");
3195 #endif // HAVE_TPETRA_DEBUG
3196  range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
3197  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
3198  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3199  // directly, because that first creates a _managed_ subview,
3200  // then returns an unmanaged version of that. That touches the
3201  // reference count, which costs performance in a measurable way.
3202  // Instead, we create a temporary unmanaged view, then create
3203  // the subview from that.
3204  subview_type sv = Kokkos::subview (subview_type (k_values1D_), range);
3205  const ST* const sv_raw = (rowinfo.allocSize == 0) ? nullptr : sv.data ();
3206  return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
3207  }
3208  else {
3209  return ArrayView<impl_scalar_type> ();
3210  }
3211  }
3212 
3213 
3214  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3215  LocalOrdinal
3218  LocalOrdinal& numEnt,
3219  const RowInfo& rowinfo) const
3220  {
3221 #ifdef HAVE_TPETRA_DEBUG
3222  constexpr bool debug = true;
3223 #else
3224  constexpr bool debug = false;
3225 #endif // HAVE_TPETRA_DEBUG
3226 
3227  if (k_values1D_.extent (0) != 0 && rowinfo.allocSize > 0) {
3228  if (debug) {
3229  if (rowinfo.offset1D + rowinfo.allocSize > k_values1D_.extent (0)) {
3230  vals = nullptr;
3231  numEnt = 0;
3232  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
3233  }
3234  }
3235  vals = k_values1D_.data () + rowinfo.offset1D;
3236  numEnt = rowinfo.allocSize;
3237  }
3238  else {
3239  vals = nullptr;
3240  numEnt = 0;
3241  }
3242 
3243  return static_cast<LocalOrdinal> (0);
3244  }
3245 
3246  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3247  LocalOrdinal
3250  LocalOrdinal& numEnt,
3251  const RowInfo& rowinfo) const
3252  {
3253  const impl_scalar_type* valsConst;
3254  const LocalOrdinal err = this->getViewRawConst (valsConst, numEnt, rowinfo);
3255  vals = const_cast<impl_scalar_type*> (valsConst);
3256  return err;
3257  }
3258 
3259  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3260  Kokkos::View<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type*,
3262  Kokkos::MemoryUnmanaged>
3264  getRowView (const RowInfo& rowInfo) const
3265  {
3266  using Kokkos::MemoryUnmanaged;
3267  using Kokkos::View;
3268  typedef impl_scalar_type ST;
3269  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
3270  typedef std::pair<size_t, size_t> range_type;
3271 
3272  if (k_values1D_.extent (0) != 0 && rowInfo.allocSize > 0) {
3273 #ifdef HAVE_TPETRA_DEBUG
3274  TEUCHOS_TEST_FOR_EXCEPTION
3275  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.extent (0),
3276  std::range_error, "Tpetra::CrsMatrix::getRowView: Invalid access "
3277  "to 1-D storage of values. rowInfo.offset1D ("
3278  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
3279  << ") > this->k_values1D_.extent(0) ("
3280  << this->k_values1D_.extent (0) << ").");
3281 #endif // HAVE_TPETRA_DEBUG
3282  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
3283  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3284  // directly, because that first creates a _managed_ subview,
3285  // then returns an unmanaged version of that. That touches the
3286  // reference count, which costs performance in a measurable way.
3287  // Instead, we create a temporary unmanaged view, then create
3288  // the subview from that.
3289  return Kokkos::subview (subview_type (this->k_values1D_), range);
3290  }
3291  else {
3292  return subview_type ();
3293  }
3294  }
3295 
3296  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3297  Kokkos::View<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type*,
3298  typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::execution_space,
3299  Kokkos::MemoryUnmanaged>
3300  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
3301  getRowViewNonConst (const RowInfo& rowInfo) const
3302  {
3303  using Kokkos::MemoryUnmanaged;
3304  using Kokkos::View;
3305  typedef impl_scalar_type ST;
3306  typedef View<ST*, execution_space, MemoryUnmanaged> subview_type;
3307  typedef std::pair<size_t, size_t> range_type;
3308 
3309  if (k_values1D_.extent (0) != 0 && rowInfo.allocSize > 0) {
3310 #ifdef HAVE_TPETRA_DEBUG
3311  TEUCHOS_TEST_FOR_EXCEPTION
3312  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.extent (0),
3313  std::range_error, "Tpetra::CrsMatrix::getRowViewNonConst: Invalid "
3314  "access to 1-D storage of values. rowInfo.offset1D ("
3315  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
3316  << ") > this->k_values1D_.extent(0) ("
3317  << this->k_values1D_.extent (0) << ").");
3318 #endif // HAVE_TPETRA_DEBUG
3319  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
3320  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
3321  // directly, because that first creates a _managed_ subview,
3322  // then returns an unmanaged version of that. That touches the
3323  // reference count, which costs performance in a measurable way.
3324  // Instead, we create a temporary unmanaged view, then create
3325  // the subview from that.
3326  return Kokkos::subview (subview_type (this->k_values1D_), range);
3327  }
3328  else {
3329  return subview_type ();
3330  }
3331  }
3332 
3333  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3334  Teuchos::ArrayView<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::impl_scalar_type>
3336  getViewNonConst (const RowInfo& rowinfo) const
3337  {
3338  return Teuchos::av_const_cast<impl_scalar_type> (this->getView (rowinfo));
3339  }
3340 
3341  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3342  void
3344  getLocalRowCopy (LocalOrdinal localRow,
3345  const Teuchos::ArrayView<LocalOrdinal>& indices,
3346  const Teuchos::ArrayView<Scalar>& values,
3347  size_t& numEntries) const
3348  {
3349  using Teuchos::ArrayView;
3350  using Teuchos::av_reinterpret_cast;
3351  const char tfecfFuncName[] = "getLocalRowCopy: ";
3352 
3353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3354  (! this->hasColMap (), std::runtime_error,
3355  "The matrix does not have a column Map yet. This means we don't have "
3356  "local indices for columns yet, so it doesn't make sense to call this "
3357  "method. If the matrix doesn't have a column Map yet, you should call "
3358  "fillComplete on it first.");
3359 
3360  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3361  const size_t theNumEntries = rowinfo.numEntries;
3362  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3363  (static_cast<size_t> (indices.size ()) < theNumEntries ||
3364  static_cast<size_t> (values.size ()) < theNumEntries,
3365  std::runtime_error, "Row with local index " << localRow << " has " <<
3366  theNumEntries << " entry/ies, but indices.size() = " <<
3367  indices.size () << " and values.size() = " << values.size () << ".");
3368  numEntries = theNumEntries; // first side effect
3369 
3370  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3371  if (staticGraph_->isLocallyIndexed ()) {
3372  const LocalOrdinal* curLclInds;
3373  const impl_scalar_type* curVals;
3374  LocalOrdinal numSpots; // includes both current entries and extra space
3375 
3376  // If we got this far, rowinfo should be correct and should
3377  // refer to a valid local row. Thus, these error checks are
3378  // superfluous, but we retain them in a debug build.
3379 #ifdef HAVE_TPETRA_DEBUG
3380  int err =
3381  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3382  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3383  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3384  "staticGraph_->getLocalViewRawConst returned nonzero error code "
3385  << err << ".");
3386  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3387  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3388  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3389  << ".");
3390  const LocalOrdinal numSpotsBefore = numSpots;
3391  err = getViewRawConst (curVals, numSpots, rowinfo);
3392  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3393  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3394  "getViewRaw returned nonzero error code " << err << ".");
3395  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3396  (numSpotsBefore != numSpots, std::logic_error,
3397  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3398  << numSpots << ".");
3399 #else
3400  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3401  (void) getViewRawConst (curVals, numSpots, rowinfo);
3402 #endif // HAVE_TPETRA_DEBUG
3403 
3404  for (size_t j = 0; j < theNumEntries; ++j) {
3405  values[j] = curVals[j];
3406  indices[j] = curLclInds[j];
3407  }
3408  }
3409  else if (staticGraph_->isGloballyIndexed ()) {
3410  // Don't call getColMap(), because it touches RCP's reference count.
3411  const map_type& colMap = * (staticGraph_->colMap_);
3412  const GlobalOrdinal* curGblInds;
3413  const impl_scalar_type* curVals;
3414  LocalOrdinal numSpots; // includes both current entries and extra space
3415 
3416  // If we got this far, rowinfo should be correct and should
3417  // refer to a valid local row. Thus, these error checks are
3418  // superfluous, but we retain them in a debug build.
3419 #ifdef HAVE_TPETRA_DEBUG
3420  int err =
3421  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3422  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3423  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3424  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
3425  << err << ".");
3426  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3427  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3428  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3429  << ".");
3430  const LocalOrdinal numSpotsBefore = numSpots;
3431  err = getViewRawConst (curVals, numSpots, rowinfo);
3432  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3433  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3434  "getViewRawConst returned nonzero error code " << err << ".");
3435  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3436  (numSpotsBefore != numSpots, std::logic_error,
3437  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3438  << numSpots << ".");
3439 #else
3440  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3441  (void) getViewRawConst (curVals, numSpots, rowinfo);
3442 #endif //HAVE_TPETRA_DEBUG
3443 
3444  for (size_t j = 0; j < theNumEntries; ++j) {
3445  values[j] = curVals[j];
3446  indices[j] = colMap.getLocalElement (curGblInds[j]);
3447  }
3448  }
3449  }
3450  }
3451 
3452  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3453  void
3455  getGlobalRowCopy (GlobalOrdinal globalRow,
3456  const Teuchos::ArrayView<GlobalOrdinal>& indices,
3457  const Teuchos::ArrayView<Scalar>& values,
3458  size_t& numEntries) const
3459  {
3460  using Teuchos::ArrayView;
3461  using Teuchos::av_reinterpret_cast;
3462  const char tfecfFuncName[] = "getGlobalRowCopy: ";
3463 
3464  const RowInfo rowinfo =
3465  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3466  const size_t theNumEntries = rowinfo.numEntries;
3467  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3468  static_cast<size_t> (indices.size ()) < theNumEntries ||
3469  static_cast<size_t> (values.size ()) < theNumEntries,
3470  std::runtime_error, "Row with global index " << globalRow << " has "
3471  << theNumEntries << " entry/ies, but indices.size() = " <<
3472  indices.size () << " and values.size() = " << values.size () << ".");
3473  numEntries = theNumEntries; // first side effect
3474 
3475  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3476  if (staticGraph_->isLocallyIndexed ()) {
3477  const map_type& colMap = * (staticGraph_->colMap_);
3478  const LocalOrdinal* curLclInds;
3479  const impl_scalar_type* curVals;
3480  LocalOrdinal numSpots; // includes both current entries and extra space
3481 
3482  // If we got this far, rowinfo should be correct and should
3483  // refer to a valid local row. Thus, these error checks are
3484  // superfluous, but we retain them in a debug build.
3485 #ifdef HAVE_TPETRA_DEBUG
3486  int err =
3487  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3488  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3489  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3490  "staticGraph_->getLocalViewRawConst returned nonzero error code "
3491  << err << ".");
3492  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3493  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3494  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3495  << ".");
3496  const LocalOrdinal numSpotsBefore = numSpots;
3497  err = getViewRawConst (curVals, numSpots, rowinfo);
3498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3499  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3500  "getViewRaw returned nonzero error code " << err << ".");
3501  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3502  (numSpotsBefore != numSpots, std::logic_error,
3503  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3504  << numSpots << ".");
3505 #else
3506  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
3507  (void) getViewRawConst (curVals, numSpots, rowinfo);
3508 #endif //HAVE_TPETRA_DEBUG
3509 
3510  for (size_t j = 0; j < theNumEntries; ++j) {
3511  values[j] = curVals[j];
3512  indices[j] = colMap.getGlobalElement (curLclInds[j]);
3513  }
3514  }
3515  else if (staticGraph_->isGloballyIndexed ()) {
3516  const GlobalOrdinal* curGblInds;
3517  const impl_scalar_type* curVals;
3518  LocalOrdinal numSpots; // includes both current entries and extra space
3519 
3520  // If we got this far, rowinfo should be correct and should
3521  // refer to a valid local row. Thus, these error checks are
3522  // superfluous, but we retain them in a debug build.
3523 #ifdef HAVE_TPETRA_DEBUG
3524  int err =
3525  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3526  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3527  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3528  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
3529  << err << ".");
3530  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3531  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
3532  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
3533  << ".");
3534  const LocalOrdinal numSpotsBefore = numSpots;
3535  err = getViewRawConst (curVals, numSpots, rowinfo);
3536  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3537  (err != static_cast<LocalOrdinal> (0), std::logic_error,
3538  "getViewRawConst returned nonzero error code " << err << ".");
3539  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3540  (numSpotsBefore != numSpots, std::logic_error,
3541  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
3542  << numSpots << ".");
3543 #else
3544  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
3545  (void) getViewRawConst (curVals, numSpots, rowinfo);
3546 #endif //HAVE_TPETRA_DEBUG
3547 
3548  for (size_t j = 0; j < theNumEntries; ++j) {
3549  values[j] = curVals[j];
3550  indices[j] = curGblInds[j];
3551  }
3552  }
3553  }
3554  }
3555 
3556  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3557  void
3559  getLocalRowView (LocalOrdinal localRow,
3560  Teuchos::ArrayView<const LocalOrdinal>& indices,
3561  Teuchos::ArrayView<const Scalar>& values) const
3562  {
3563  using Teuchos::ArrayView;
3564  using Teuchos::av_reinterpret_cast;
3565  typedef LocalOrdinal LO;
3566  const char tfecfFuncName[] = "getLocalRowView: ";
3567 
3568  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3569  isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
3570  "its indices as global indices, so you cannot get a view with local "
3571  "column indices. If the matrix has a column Map, you may call "
3572  "getLocalRowCopy() to get local column indices; otherwise, you may get "
3573  "a view with global column indices by calling getGlobalRowCopy().");
3574  indices = Teuchos::null;
3575  values = Teuchos::null;
3576  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3577  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3578  rowinfo.numEntries > 0) {
3579  ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
3580  ArrayView<const Scalar> valTmp =
3581  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3582  indices = indTmp (0, rowinfo.numEntries);
3583  values = valTmp (0, rowinfo.numEntries);
3584  }
3585 
3586 #ifdef HAVE_TPETRA_DEBUG
3587  const char suffix[] = ". This should never happen. Please report this "
3588  "bug to the Tpetra developers.";
3589  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3590  (static_cast<size_t> (indices.size ()) !=
3591  static_cast<size_t> (values.size ()), std::logic_error,
3592  "At the end of this method, for local row " << localRow << ", "
3593  "indices.size() = " << indices.size () << " != values.size () = "
3594  << values.size () << suffix);
3595  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3596  (static_cast<size_t> (indices.size ()) !=
3597  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3598  "At the end of this method, for local row " << localRow << ", "
3599  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3600  << rowinfo.numEntries << suffix);
3601  const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3602  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3603  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3604  "of this method, for local row " << localRow << ", rowinfo.numEntries = "
3605  << rowinfo.numEntries << " != getNumEntriesInLocalRow(localRow) = " <<
3606  expectedNumEntries << suffix);
3607 #endif // HAVE_TPETRA_DEBUG
3608  }
3609 
3610  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3611  LocalOrdinal
3613  getLocalRowView (const LocalOrdinal lclRow,
3614  LocalOrdinal& numEnt,
3615  const impl_scalar_type*& val,
3616  const LocalOrdinal*& ind) const
3617  {
3618  typedef LocalOrdinal LO;
3619 
3620  // Don't call getCrsGraph(), because that modfies an RCP reference
3621  // count, which is not thread safe. Checking whether an RCP is
3622  // null does NOT modify its reference count, and is therefore
3623  // thread safe. Note that isGloballyIndexed() calls
3624  // getCrsGraph(), so we have to go to the graph directly.
3625  if (staticGraph_.is_null () || staticGraph_->isGloballyIndexed ()) {
3626  return Tpetra::Details::OrdinalTraits<LO>::invalid ();
3627  }
3628  else {
3629  const RowInfo rowInfo = staticGraph_->getRowInfo (lclRow);
3630  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
3631  numEnt = 0; // no valid entries in this row on the calling process
3632  val = nullptr;
3633  ind = nullptr;
3634  // First argument (lclRow) invalid, so make 1 the error code.
3635  return static_cast<LO> (1);
3636  }
3637  else {
3638  numEnt = static_cast<LO> (rowInfo.numEntries);
3639  auto lclColInds = staticGraph_->getLocalKokkosRowView (rowInfo);
3640  ind = lclColInds.data (); // FIXME (mfh 18 Jul 2016) UVM
3641  const LO err = this->getViewRawConst (val, numEnt, rowInfo);
3642  return err;
3643  }
3644  }
3645  }
3646 
3647  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3648  LocalOrdinal
3650  getLocalRowViewRaw (const LocalOrdinal lclRow,
3651  LocalOrdinal& numEnt,
3652  const LocalOrdinal*& lclColInds,
3653  const Scalar*& vals) const
3654  {
3655  const impl_scalar_type* vals_ist = nullptr;
3656  const LocalOrdinal errCode =
3657  this->getLocalRowView (lclRow, numEnt, vals_ist, lclColInds);
3658  vals = reinterpret_cast<const Scalar*> (vals_ist);
3659  return errCode;
3660  }
3661 
3662  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3663  void
3665  getGlobalRowView (GlobalOrdinal globalRow,
3666  Teuchos::ArrayView<const GlobalOrdinal>& indices,
3667  Teuchos::ArrayView<const Scalar>& values) const
3668  {
3669  using Teuchos::ArrayView;
3670  using Teuchos::av_reinterpret_cast;
3671  typedef GlobalOrdinal GO;
3672  const char tfecfFuncName[] = "getGlobalRowView: ";
3673 
3674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3675  isLocallyIndexed (), std::runtime_error,
3676  "The matrix is locally indexed, so we cannot return a view of the row "
3677  "with global column indices. Use getGlobalRowCopy() instead.");
3678  indices = Teuchos::null;
3679  values = Teuchos::null;
3680  const RowInfo rowinfo =
3681  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3682  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3683  rowinfo.numEntries > 0) {
3684  ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
3685  ArrayView<const Scalar> valTmp =
3686  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3687 #ifdef HAVE_TPETRA_DEBUG
3688  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3689  (static_cast<size_t> (indTmp.size ()) < rowinfo.numEntries ||
3690  static_cast<size_t> (valTmp.size ()) < rowinfo.numEntries,
3691  std::logic_error, std::endl << "rowinfo.numEntries not accurate. "
3692  << std::endl << "indTmp.size() = " << indTmp.size ()
3693  << ", valTmp.size() = " << valTmp.size ()
3694  << ", rowinfo.numEntries = " << rowinfo.numEntries << ".");
3695 #endif // HAVE_TPETRA_DEBUG
3696  indices = indTmp (0, rowinfo.numEntries);
3697  values = valTmp (0, rowinfo.numEntries);
3698  }
3699 
3700 #ifdef HAVE_TPETRA_DEBUG
3701  const char suffix[] = ". This should never happen. Please report this "
3702  "bug to the Tpetra developers.";
3703  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3704  (static_cast<size_t> (indices.size ()) !=
3705  static_cast<size_t> (values.size ()), std::logic_error,
3706  "At the end of this method, for global row " << globalRow << ", "
3707  "indices.size() = " << indices.size () << " != values.size () = "
3708  << values.size () << suffix);
3709  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3710  (static_cast<size_t> (indices.size ()) !=
3711  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3712  "At the end of this method, for global row " << globalRow << ", "
3713  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3714  << rowinfo.numEntries << suffix);
3715  const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3716  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3717  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3718  "of this method, for global row " << globalRow << ", rowinfo.numEntries "
3719  "= " << rowinfo.numEntries << " != getNumEntriesInGlobalRow(globalRow) ="
3720  " " << expectedNumEntries << suffix);
3721 #endif // HAVE_TPETRA_DEBUG
3722  }
3723 
3724  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3725  void
3727  scale (const Scalar& alpha)
3728  {
3729  typedef LocalOrdinal LO;
3730  const char tfecfFuncName[] = "scale: ";
3731  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3732 
3733  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3734  ! isFillActive (), std::runtime_error,
3735  "Fill must be active before you may call this method. "
3736  "Please call resumeFill() to make fill active.");
3737 
3738  const size_t nlrs = staticGraph_->getNodeNumRows ();
3739  const size_t numEntries = staticGraph_->getNodeNumEntries ();
3740  if (! staticGraph_->indicesAreAllocated () ||
3741  nlrs == 0 || numEntries == 0) {
3742  // do nothing
3743  }
3744  else {
3745  auto lclMat = this->getLocalMatrix ();
3746 
3747  const LO lclNumRows = lclMat.numRows ();
3748  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3749  auto row_i = lclMat.row (lclRow);
3750  for (LO k = 0; k < row_i.length; ++k) {
3751  // FIXME (mfh 02 Jan 2015) This assumes CUDA UVM.
3752  row_i.value (k) *= theAlpha;
3753  }
3754  }
3755  }
3756  }
3757 
3758  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3759  void
3761  setAllToScalar (const Scalar& alpha)
3762  {
3763  const char tfecfFuncName[] = "setAllToScalar: ";
3764  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3765  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3766  ! isFillActive (), std::runtime_error,
3767  "Fill must be active before you may call this method. "
3768  "Please call resumeFill() to make fill active.");
3769 
3770  // replace all values in the matrix
3771  // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
3772  // however, if there are no valid entries, we can short-circuit
3773  // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
3774  const size_t numEntries = staticGraph_->getNodeNumEntries();
3775  if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3776  // do nothing
3777  }
3778  else {
3779  // FIXME (mfh 24 Dec 2014) Once CrsMatrix implements DualView
3780  // semantics, this would be the place to mark memory as
3781  // modified.
3782  Kokkos::deep_copy (k_values1D_, theAlpha);
3783  }
3784  }
3785 
3786  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3787  void
3789  setAllValues (const typename local_matrix_type::row_map_type& rowPointers,
3790  const typename local_graph_type::entries_type::non_const_type& columnIndices,
3791  const typename local_matrix_type::values_type& values)
3792  {
3793  const char tfecfFuncName[] = "setAllValues: ";
3794  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3795  (columnIndices.size () != values.size (), std::invalid_argument,
3796  "columnIndices.size() = " << columnIndices.size () << " != values.size()"
3797  " = " << values.size () << ".");
3798  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3799  (myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
3800 
3801  try {
3802  myGraph_->setAllIndices (rowPointers, columnIndices);
3803  }
3804  catch (std::exception &e) {
3805  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3806  (true, std::runtime_error, "myGraph_->setAllIndices() threw an "
3807  "exception: " << e.what ());
3808  }
3809  // Make sure that myGraph_ now has a local graph. It may not be
3810  // fillComplete yet, so it's important to check. We don't care
3811  // whether setAllIndices() did a shallow copy or a deep copy, so a
3812  // good way to check is to compare dimensions.
3813  auto lclGraph = myGraph_->getLocalGraph ();
3814  const size_t numEnt = lclGraph.entries.extent (0);
3815  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3816  (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3817  numEnt != static_cast<size_t> (columnIndices.extent (0)),
3818  std::logic_error, "myGraph_->setAllIndices() did not correctly create "
3819  "local graph. Please report this bug to the Tpetra developers.");
3820 
3821  const size_t numCols = myGraph_->getColMap ()->getNodeNumElements ();
3822 
3823  auto lclMat = std::make_shared<local_matrix_type>
3824  ("Tpetra::CrsMatrix::lclMatrix_", numCols, values, lclGraph);
3825  lclMatrix_ = std::make_shared<local_multiply_op_type> (lclMat);
3826 
3827  // FIXME (22 Jun 2016) I would very much like to get rid of
3828  // k_values1D_ at some point. I find it confusing to have all
3829  // these extra references lying around.
3830  k_values1D_ = lclMat->values;
3831 
3832  // Storage MUST be packed, since the interface doesn't give any
3833  // way to indicate any extra space at the end of each row.
3834  this->storageStatus_ = Details::STORAGE_1D_PACKED;
3835 
3836  checkInternalState ();
3837  }
3838 
3839  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3840  void
3842  setAllValues (const Teuchos::ArrayRCP<size_t>& ptr,
3843  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3844  const Teuchos::ArrayRCP<Scalar>& val)
3845  {
3846  using Kokkos::Compat::getKokkosViewDeepCopy;
3847  using Teuchos::ArrayRCP;
3848  using Teuchos::av_reinterpret_cast;
3849  typedef device_type DT;
3850  typedef impl_scalar_type IST;
3851  typedef typename local_matrix_type::row_map_type row_map_type;
3852  //typedef typename row_map_type::non_const_value_type row_offset_type;
3853  const char tfecfFuncName[] = "setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3854 
3855  // The row offset type may depend on the execution space. It may
3856  // not necessarily be size_t. If it's not, we need to make a deep
3857  // copy. We need to make a deep copy anyway so that Kokkos can
3858  // own the memory. Regardless, ptrIn gets the copy.
3859  typename row_map_type::non_const_type ptrNative ("ptr", ptr.size ());
3860  Kokkos::View<const size_t*,
3861  typename row_map_type::array_layout,
3862  Kokkos::HostSpace,
3863  Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3864  ::Tpetra::Details::copyOffsets (ptrNative, ptrSizeT);
3865 
3866  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3867  (ptrNative.extent (0) != ptrSizeT.extent (0),
3868  std::logic_error, "ptrNative.extent(0) = " <<
3869  ptrNative.extent (0) << " != ptrSizeT.extent(0) = "
3870  << ptrSizeT.extent (0) << ". Please report this bug to the "
3871  "Tpetra developers.");
3872 
3873  auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3874  auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3875  this->setAllValues (ptrNative, indIn, valIn);
3876  }
3877 
3878  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3879  void
3881  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
3882  {
3883  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
3884  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3885  (staticGraph_.is_null (), std::runtime_error, "The matrix has no graph.");
3886 
3887  // mfh 11 May 2016: We plan to deprecate the ArrayRCP version of
3888  // this method in CrsGraph too, so don't call it (otherwise build
3889  // warnings will show up and annoy users). Instead, copy results
3890  // in and out, if the memory space requires it.
3891 
3892  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
3893  if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3894  offsets.resize (lclNumRows);
3895  }
3896 
3897  // The input ArrayRCP must always be a host pointer. Thus, if
3898  // device_type::memory_space is Kokkos::HostSpace, it's OK for us
3899  // to write to that allocation directly as a Kokkos::View.
3900  typedef typename device_type::memory_space memory_space;
3901  if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3902  // It is always syntactically correct to assign a raw host
3903  // pointer to a device View, so this code will compile correctly
3904  // even if this branch never runs.
3905  typedef Kokkos::View<size_t*, device_type,
3906  Kokkos::MemoryUnmanaged> output_type;
3907  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3908  staticGraph_->getLocalDiagOffsets (offsetsOut);
3909  }
3910  else {
3911  Kokkos::View<size_t*, device_type> offsetsTmp ("diagOffsets", lclNumRows);
3912  staticGraph_->getLocalDiagOffsets (offsetsTmp);
3913  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
3914  Kokkos::MemoryUnmanaged> output_type;
3915  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3916  Kokkos::deep_copy (offsetsOut, offsetsTmp);
3917  }
3918  }
3919 
3920  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3921  void
3924  {
3925  using Teuchos::ArrayRCP;
3926  using Teuchos::ArrayView;
3927  using Teuchos::av_reinterpret_cast;
3928  const char tfecfFuncName[] = "getLocalDiagCopy (1-arg): ";
3929  typedef local_ordinal_type LO;
3930 
3931 
3932  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3933  staticGraph_.is_null (), std::runtime_error,
3934  "This method requires that the matrix have a graph.");
3935  auto rowMapPtr = this->getRowMap ();
3936  if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3937  // Processes on which the row Map or its communicator is null
3938  // don't participate. Users shouldn't even call this method on
3939  // those processes.
3940  return;
3941  }
3942  auto colMapPtr = this->getColMap ();
3943  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3944  (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3945  "This method requires that the matrix have a column Map.");
3946  const map_type& rowMap = * rowMapPtr;
3947  const map_type& colMap = * colMapPtr;
3948  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3949 
3950 #ifdef HAVE_TPETRA_DEBUG
3951  // isCompatible() requires an all-reduce, and thus this check
3952  // should only be done in debug mode.
3953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3954  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3955  "The input Vector's Map must be compatible with the CrsMatrix's row "
3956  "Map. You may check this by using Map's isCompatible method: "
3957  "diag.getMap ()->isCompatible (A.getRowMap ());");
3958 #endif // HAVE_TPETRA_DEBUG
3959 
3960  if (this->isFillComplete ()) {
3961  diag.template modify<device_type> ();
3962  const auto D_lcl = diag.template getLocalView<device_type> ();
3963  // 1-D subview of the first (and only) column of D_lcl.
3964  const auto D_lcl_1d =
3965  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3966 
3967  const auto lclRowMap = rowMap.getLocalMap ();
3968  const auto lclColMap = colMap.getLocalMap ();
3970  (void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
3971  lclColMap,
3972  lclMatrix_->getLocalMatrix ());
3973  }
3974  else {
3976  (void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
3977  }
3978  }
3979 
3980  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
3981  void
3984  const Kokkos::View<const size_t*, device_type,
3985  Kokkos::MemoryUnmanaged>& offsets) const
3986  {
3987  typedef LocalOrdinal LO;
3988 
3989 #ifdef HAVE_TPETRA_DEBUG
3990  const char tfecfFuncName[] = "getLocalDiagCopy: ";
3991  const map_type& rowMap = * (this->getRowMap ());
3992  // isCompatible() requires an all-reduce, and thus this check
3993  // should only be done in debug mode.
3994  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3995  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3996  "The input Vector's Map must be compatible with (in the sense of Map::"
3997  "isCompatible) the CrsMatrix's row Map.");
3998 #endif // HAVE_TPETRA_DEBUG
3999 
4000  // For now, we fill the Vector on the host and sync to device.
4001  // Later, we may write a parallel kernel that works entirely on
4002  // device.
4003  //
4004  // NOTE (mfh 21 Jan 2016): The host kernel here assumes UVM. Once
4005  // we write a device kernel, it will not need to assume UVM.
4006 
4007  diag.template modify<device_type> ();
4008  auto D_lcl = diag.template getLocalView<device_type> ();
4009  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4010  // Get 1-D subview of the first (and only) column of D_lcl.
4011  auto D_lcl_1d =
4012  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
4013 
4014  KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
4015  lclMatrix_->getLocalMatrix ());
4016  }
4017 
4018  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4019  void
4022  const Teuchos::ArrayView<const size_t>& offsets) const
4023  {
4024  using LO = LocalOrdinal;
4025  using host_execution_space = Kokkos::DefaultHostExecutionSpace;
4026  using IST = impl_scalar_type;
4027 
4028 #ifdef HAVE_TPETRA_DEBUG
4029  const char tfecfFuncName[] = "getLocalDiagCopy: ";
4030  const map_type& rowMap = * (this->getRowMap ());
4031  // isCompatible() requires an all-reduce, and thus this check
4032  // should only be done in debug mode.
4033  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4034  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
4035  "The input Vector's Map must be compatible with (in the sense of Map::"
4036  "isCompatible) the CrsMatrix's row Map.");
4037 #endif // HAVE_TPETRA_DEBUG
4038 
4039  // See #1510. In case diag has already been marked modified on
4040  // device, we need to clear that flag, since the code below works
4041  // on host.
4042  diag.clear_sync_state ();
4043 
4044  // For now, we fill the Vector on the host and sync to device.
4045  // Later, we may write a parallel kernel that works entirely on
4046  // device.
4047  diag.modify_host ();
4048  auto lclVecHost = diag.getLocalViewHost ();
4049  // 1-D subview of the first (and only) column of lclVecHost.
4050  auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
4051 
4052  using host_offsets_view_type =
4053  Kokkos::View<const size_t*, Kokkos::HostSpace,
4054  Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
4055  host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
4056  // Find the diagonal entries and put them in lclVecHost1d.
4057  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4058  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
4059  const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
4060 
4061  local_matrix_type lclMat = lclMatrix_->getLocalMatrix ();
4062  Kokkos::parallel_for
4063  ("Tpetra::CrsMatrix::getLocalDiagCopy",
4064  range_type (0, myNumRows),
4065  [&] (const LO lclRow) {
4066  lclVecHost1d(lclRow) = STS::zero (); // default value if no diag entry
4067  if (h_offsets[lclRow] != INV) {
4068  auto curRow = lclMat.rowConst (lclRow);
4069  lclVecHost1d(lclRow) = static_cast<IST> (curRow.value(h_offsets[lclRow]));
4070  }
4071  });
4072  diag.sync_device ();
4073  }
4074 
4075 
4076  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4077  void
4080  {
4081  using ::Tpetra::Details::ProfilingRegion;
4082  using Teuchos::ArrayRCP;
4083  using Teuchos::ArrayView;
4084  using Teuchos::null;
4085  using Teuchos::RCP;
4086  using Teuchos::rcp;
4087  using Teuchos::rcpFromRef;
4088  using LO = local_ordinal_type;
4090  const char tfecfFuncName[] = "leftScale: ";
4091 
4092  ProfilingRegion region ("Tpetra::CrsMatrix::leftScale");
4093 
4094  RCP<const vec_type> xp;
4095  if (this->getRangeMap ()->isSameAs (* (x.getMap ()))) {
4096  // Take from Epetra: If we have a non-trivial exporter, we must
4097  // import elements that are permuted or are on other processors.
4098  auto exporter = this->getCrsGraphRef ().getExporter ();
4099  if (exporter.get () != nullptr) {
4100  RCP<vec_type> tempVec (new vec_type (this->getRowMap ()));
4101  tempVec->doImport (x, *exporter, REPLACE); // reverse mode
4102  xp = tempVec;
4103  }
4104  else {
4105  xp = rcpFromRef (x);
4106  }
4107  }
4108  else if (this->getRowMap ()->isSameAs (* (x.getMap ()))) {
4109  xp = rcpFromRef (x);
4110  }
4111  else {
4112  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4113  (true, std::invalid_argument, "x's Map must be the same as "
4114  "either the row Map or the range Map of the CrsMatrix.");
4115  }
4116 
4117  if (this->isFillComplete()) {
4118  using dev_memory_space = typename device_type::memory_space;
4119  if (xp->template need_sync<dev_memory_space> ()) {
4120  using Teuchos::rcp_const_cast;
4121  rcp_const_cast<vec_type> (xp)->template sync<dev_memory_space> ();
4122  }
4123  auto x_lcl = xp->template getLocalView<dev_memory_space> ();
4124  auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4126  leftScaleLocalCrsMatrix (lclMatrix_->getLocalMatrix (),
4127  x_lcl_1d, false, false);
4128  }
4129  else {
4130  // 6/2020 Disallow leftScale of non-fillComplete matrices #7446
4131  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4132  (true, std::runtime_error, "CrsMatrix::leftScale requires matrix to be"
4133  " fillComplete");
4134  }
4135  }
4136 
4137  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4138  void
4141  {
4142  using ::Tpetra::Details::ProfilingRegion;
4143  using Teuchos::ArrayRCP;
4144  using Teuchos::ArrayView;
4145  using Teuchos::null;
4146  using Teuchos::RCP;
4147  using Teuchos::rcp;
4148  using Teuchos::rcpFromRef;
4149  using LO = local_ordinal_type;
4151  const char tfecfFuncName[] = "rightScale: ";
4152 
4153  ProfilingRegion region ("Tpetra::CrsMatrix::rightScale");
4154 
4155  RCP<const vec_type> xp;
4156  if (this->getDomainMap ()->isSameAs (* (x.getMap ()))) {
4157  // Take from Epetra: If we have a non-trivial exporter, we must
4158  // import elements that are permuted or are on other processors.
4159  auto importer = this->getCrsGraphRef ().getImporter ();
4160  if (importer.get () != nullptr) {
4161  RCP<vec_type> tempVec (new vec_type (this->getColMap ()));
4162  tempVec->doImport (x, *importer, REPLACE);
4163  xp = tempVec;
4164  }
4165  else {
4166  xp = rcpFromRef (x);
4167  }
4168  }
4169  else if (this->getColMap ()->isSameAs (* (x.getMap ()))) {
4170  xp = rcpFromRef (x);
4171  } else {
4172  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4173  (true, std::runtime_error, "x's Map must be the same as "
4174  "either the domain Map or the column Map of the CrsMatrix.");
4175  }
4176 
4177  if (this->isFillComplete()) {
4178  using dev_memory_space = typename device_type::memory_space;
4179  if (xp->template need_sync<dev_memory_space> ()) {
4180  using Teuchos::rcp_const_cast;
4181  rcp_const_cast<vec_type> (xp)->template sync<dev_memory_space> ();
4182  }
4183  auto x_lcl = xp->template getLocalView<dev_memory_space> ();
4184  auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
4186  rightScaleLocalCrsMatrix (lclMatrix_->getLocalMatrix (),
4187  x_lcl_1d, false, false);
4188  }
4189  else {
4190  // 6/2020 Disallow rightScale of non-fillComplete matrices #7446
4191  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4192  (true, std::runtime_error, "CrsMatrix::rightScale requires matrix to be"
4193  " fillComplete");
4194  }
4195  }
4196 
4197  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4201  {
4202  using Teuchos::ArrayView;
4203  using Teuchos::outArg;
4204  using Teuchos::REDUCE_SUM;
4205  using Teuchos::reduceAll;
4206  typedef typename Teuchos::ArrayRCP<const impl_scalar_type>::size_type size_type;
4207 
4208  // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
4209  // local part of this computation. It could make sense to put
4210  // this operation in the Kokkos::CrsMatrix.
4211 
4212  // check the cache first
4213  mag_type frobNorm = frobNorm_;
4214  if (frobNorm == -STM::one ()) {
4215  mag_type mySum = STM::zero ();
4216  if (getNodeNumEntries() > 0) {
4217  if (isStorageOptimized ()) {
4218  // "Optimized" storage is packed storage. That means we can
4219  // iterate in one pass through the 1-D values array.
4220  const size_type numEntries =
4221  static_cast<size_type> (getNodeNumEntries ());
4222  for (size_type k = 0; k < numEntries; ++k) {
4223  // FIXME (mfh 05 Aug 2014) This assumes UVM.
4224  const impl_scalar_type val = k_values1D_(k);
4225  // Note (etp 06 Jan 2015) We need abs() here for composite types
4226  // (in general, if mag_type is on the left-hand-side, we need
4227  // abs() on the right-hand-side)
4228  const mag_type val_abs = STS::abs (val);
4229  mySum += val_abs * val_abs;
4230  }
4231  }
4232  else {
4233  const LocalOrdinal numRows =
4234  static_cast<LocalOrdinal> (this->getNodeNumRows ());
4235  for (LocalOrdinal r = 0; r < numRows; ++r) {
4236  const RowInfo rowInfo = myGraph_->getRowInfo (r);
4237  const size_type numEntries =
4238  static_cast<size_type> (rowInfo.numEntries);
4239  ArrayView<const impl_scalar_type> A_r =
4240  this->getView (rowInfo).view (0, numEntries);
4241  for (size_type k = 0; k < numEntries; ++k) {
4242  const impl_scalar_type val = A_r[k];
4243  const mag_type val_abs = STS::abs (val);
4244  mySum += val_abs * val_abs;
4245  }
4246  }
4247  }
4248  }
4249  mag_type totalSum = STM::zero ();
4250  reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
4251  mySum, outArg (totalSum));
4252  frobNorm = STM::sqrt (totalSum);
4253  }
4254  if (isFillComplete ()) {
4255  // Only cache the result if the matrix is fill complete.
4256  // Otherwise, the values might still change. resumeFill clears
4257  // the cache.
4258  frobNorm_ = frobNorm;
4259  }
4260  return frobNorm;
4261  }
4262 
4263  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4264  void
4266  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4267  {
4268  const char tfecfFuncName[] = "replaceColMap: ";
4269  // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
4270  // Then replacing the column Map might mean that we need to
4271  // reindex the column indices.
4272  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4273  myGraph_.is_null (), std::runtime_error,
4274  "This method does not work if the matrix has a const graph. The whole "
4275  "idea of a const graph is that you are not allowed to change it, but "
4276  "this method necessarily must modify the graph, since the graph owns "
4277  "the matrix's column Map.");
4278  myGraph_->replaceColMap (newColMap);
4279  }
4280 
4281  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4282  void
4285  const Teuchos::RCP<const map_type>& newColMap,
4286  const Teuchos::RCP<const import_type>& newImport,
4287  const bool sortEachRow)
4288  {
4289  const char tfecfFuncName[] = "reindexColumns: ";
4290  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4291  graph == nullptr && myGraph_.is_null (), std::invalid_argument,
4292  "The input graph is null, but the matrix does not own its graph.");
4293 
4294  crs_graph_type& theGraph = (graph == nullptr) ? *myGraph_ : *graph;
4295  const bool sortGraph = false; // we'll sort graph & matrix together below
4296  theGraph.reindexColumns (newColMap, newImport, sortGraph);
4297  if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
4298  const LocalOrdinal lclNumRows =
4299  static_cast<LocalOrdinal> (theGraph.getNodeNumRows ());
4300  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
4301  const RowInfo rowInfo = theGraph.getRowInfo (row);
4302  auto lclColInds = theGraph.getLocalKokkosRowViewNonConst (rowInfo);
4303  auto vals = this->getRowViewNonConst (rowInfo);
4304  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least for
4305  // lclColInds, if not also for values.
4306  sort2 (lclColInds.data (),
4307  lclColInds.data () + rowInfo.numEntries,
4308  vals.data ());
4309  }
4310  theGraph.indicesAreSorted_ = true;
4311  }
4312  }
4313 
4314  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4315  void
4317  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4318  Teuchos::RCP<const import_type>& newImporter)
4319  {
4320  const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
4321  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4322  myGraph_.is_null (), std::runtime_error,
4323  "This method does not work if the matrix has a const graph. The whole "
4324  "idea of a const graph is that you are not allowed to change it, but this"
4325  " method necessarily must modify the graph, since the graph owns the "
4326  "matrix's domain Map and Import objects.");
4327  myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
4328  }
4329 
4330  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4331  void
4333  insertNonownedGlobalValues (const GlobalOrdinal globalRow,
4334  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4335  const Teuchos::ArrayView<const Scalar>& values)
4336  {
4337  using Teuchos::Array;
4338  typedef GlobalOrdinal GO;
4339  typedef typename Array<GO>::size_type size_type;
4340 
4341  const size_type numToInsert = indices.size ();
4342  // Add the new data to the list of nonlocals.
4343  // This creates the arrays if they don't exist yet.
4344  std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4345  Array<GO>& curRowInds = curRow.first;
4346  Array<Scalar>& curRowVals = curRow.second;
4347  const size_type newCapacity = curRowInds.size () + numToInsert;
4348  curRowInds.reserve (newCapacity);
4349  curRowVals.reserve (newCapacity);
4350  for (size_type k = 0; k < numToInsert; ++k) {
4351  curRowInds.push_back (indices[k]);
4352  curRowVals.push_back (values[k]);
4353  }
4354  }
4355 
4356  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4357  void
4360  {
4361  using Details::Behavior;
4363  using Teuchos::Comm;
4364  using Teuchos::outArg;
4365  using Teuchos::RCP;
4366  using Teuchos::rcp;
4367  using Teuchos::REDUCE_MAX;
4368  using Teuchos::REDUCE_MIN;
4369  using Teuchos::reduceAll;
4370  using std::endl;
4372  //typedef LocalOrdinal LO;
4373  typedef GlobalOrdinal GO;
4374  typedef typename Teuchos::Array<GO>::size_type size_type;
4375  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
4376  ProfilingRegion regionGlobalAssemble ("Tpetra::CrsMatrix::globalAssemble");
4377 
4378  const bool verbose = Behavior::verbose("CrsMatrix");
4379  std::unique_ptr<std::string> prefix;
4380  if (verbose) {
4381  prefix = this->createPrefix("CrsMatrix", "globalAssemble");
4382  std::ostringstream os;
4383  os << *prefix << "nonlocals_.size()=" << nonlocals_.size()
4384  << endl;
4385  std::cerr << os.str();
4386  }
4387  RCP<const Comm<int> > comm = getComm ();
4388 
4389  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4390  (! isFillActive (), std::runtime_error, "Fill must be active before "
4391  "you may call this method.");
4392 
4393  const size_t myNumNonlocalRows = nonlocals_.size ();
4394 
4395  // If no processes have nonlocal rows, then we don't have to do
4396  // anything. Checking this is probably cheaper than constructing
4397  // the Map of nonlocal rows (see below) and noticing that it has
4398  // zero global entries.
4399  {
4400  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4401  int someoneHasNonlocalRows = 0;
4402  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4403  outArg (someoneHasNonlocalRows));
4404  if (someoneHasNonlocalRows == 0) {
4405  return; // no process has nonlocal rows, so nothing to do
4406  }
4407  }
4408 
4409  // 1. Create a list of the "nonlocal" rows on each process. this
4410  // requires iterating over nonlocals_, so while we do this,
4411  // deduplicate the entries and get a count for each nonlocal
4412  // row on this process.
4413  // 2. Construct a new row Map corresponding to those rows. This
4414  // Map is likely overlapping. We know that the Map is not
4415  // empty on all processes, because the above all-reduce and
4416  // return exclude that case.
4417 
4418  RCP<const map_type> nonlocalRowMap;
4419  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
4420  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4421  {
4422  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4423  size_type curPos = 0;
4424  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4425  ++mapIter, ++curPos) {
4426  myNonlocalGblRows[curPos] = mapIter->first;
4427  // Get the values and column indices by reference, since we
4428  // intend to change them in place (that's what "erase" does).
4429  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4430  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4431 
4432  // Sort both arrays jointly, using the column indices as keys,
4433  // then merge them jointly. "Merge" here adds values
4434  // corresponding to the same column indices. The first 2 args
4435  // of merge2 are output arguments that work just like the
4436  // return value of std::unique.
4437  sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4438  typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4439  typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4440  merge2 (gblCols_newEnd, vals_newEnd,
4441  gblCols.begin (), gblCols.end (),
4442  vals.begin (), vals.end ());
4443  gblCols.erase (gblCols_newEnd, gblCols.end ());
4444  vals.erase (vals_newEnd, vals.end ());
4445  numEntPerNonlocalRow[curPos] = gblCols.size ();
4446  }
4447 
4448  // Currently, Map requires that its indexBase be the global min
4449  // of all its global indices. Map won't compute this for us, so
4450  // we must do it. If our process has no nonlocal rows, set the
4451  // "min" to the max possible GO value. This ensures that if
4452  // some process has at least one nonlocal row, then it will pick
4453  // that up as the min. We know that at least one process has a
4454  // nonlocal row, since the all-reduce and return at the top of
4455  // this method excluded that case.
4456  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4457  {
4458  auto iter = std::min_element (myNonlocalGblRows.begin (),
4459  myNonlocalGblRows.end ());
4460  if (iter != myNonlocalGblRows.end ()) {
4461  myMinNonlocalGblRow = *iter;
4462  }
4463  }
4464  GO gblMinNonlocalGblRow = 0;
4465  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4466  outArg (gblMinNonlocalGblRow));
4467  const GO indexBase = gblMinNonlocalGblRow;
4468  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4469  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4470  }
4471 
4472  // 3. Use the values and column indices for each nonlocal row, as
4473  // stored in nonlocals_, to construct a CrsMatrix corresponding
4474  // to nonlocal rows. We may use StaticProfile, since we have
4475  // exact counts of the number of entries in each nonlocal row.
4476 
4477  if (verbose) {
4478  std::ostringstream os;
4479  os << *prefix << "Create nonlocal matrix" << endl;
4480  std::cerr << os.str();
4481  }
4482  RCP<crs_matrix_type> nonlocalMatrix =
4483  rcp (new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow (),
4484  StaticProfile));
4485  {
4486  size_type curPos = 0;
4487  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4488  ++mapIter, ++curPos) {
4489  const GO gblRow = mapIter->first;
4490  // Get values & column indices by ref, just to avoid copy.
4491  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4492  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4493  //const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
4494  nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4495  }
4496  }
4497  // There's no need to fill-complete the nonlocals matrix.
4498  // We just use it as a temporary container for the Export.
4499 
4500  // 4. If the original row Map is one to one, then we can Export
4501  // directly from nonlocalMatrix into this. Otherwise, we have
4502  // to create a temporary matrix with a one-to-one row Map,
4503  // Export into that, then Import from the temporary matrix into
4504  // *this.
4505 
4506  auto origRowMap = this->getRowMap ();
4507  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4508 
4509  int isLocallyComplete = 1; // true by default
4510 
4511  if (origRowMapIsOneToOne) {
4512  if (verbose) {
4513  std::ostringstream os;
4514  os << *prefix << "Original row Map is 1-to-1" << endl;
4515  std::cerr << os.str();
4516  }
4517  export_type exportToOrig (nonlocalRowMap, origRowMap);
4518  if (! exportToOrig.isLocallyComplete ()) {
4519  isLocallyComplete = 0;
4520  }
4521  if (verbose) {
4522  std::ostringstream os;
4523  os << *prefix << "doExport from nonlocalMatrix" << endl;
4524  std::cerr << os.str();
4525  }
4526  this->doExport (*nonlocalMatrix, exportToOrig, Tpetra::ADD);
4527  // We're done at this point!
4528  }
4529  else {
4530  if (verbose) {
4531  std::ostringstream os;
4532  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
4533  std::cerr << os.str();
4534  }
4535  // If you ask a Map whether it is one to one, it does some
4536  // communication and stashes intermediate results for later use
4537  // by createOneToOne. Thus, calling createOneToOne doesn't cost
4538  // much more then the original cost of calling isOneToOne.
4539  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
4540  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4541  if (! exportToOneToOne.isLocallyComplete ()) {
4542  isLocallyComplete = 0;
4543  }
4544 
4545  // Create a temporary matrix with the one-to-one row Map.
4546  //
4547  // TODO (mfh 09 Sep 2016, 12 Sep 2016) Estimate # entries in
4548  // each row, to avoid reallocation during the Export operation.
4549  if (verbose) {
4550  std::ostringstream os;
4551  os << *prefix << "Create & doExport into 1-to-1 matrix"
4552  << endl;
4553  std::cerr << os.str();
4554  }
4555  crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4556  // Export from matrix of nonlocals into the temp one-to-one matrix.
4557  oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4558  Tpetra::ADD);
4559 
4560  // We don't need the matrix of nonlocals anymore, so get rid of
4561  // it, to keep the memory high-water mark down.
4562  if (verbose) {
4563  std::ostringstream os;
4564  os << *prefix << "Free nonlocalMatrix" << endl;
4565  std::cerr << os.str();
4566  }
4567  nonlocalMatrix = Teuchos::null;
4568 
4569  // Import from the one-to-one matrix to the original matrix.
4570  if (verbose) {
4571  std::ostringstream os;
4572  os << *prefix << "doImport from 1-to-1 matrix" << endl;
4573  std::cerr << os.str();
4574  }
4575  import_type importToOrig (oneToOneRowMap, origRowMap);
4576  this->doImport (oneToOneMatrix, importToOrig, Tpetra::ADD);
4577  }
4578 
4579  // It's safe now to clear out nonlocals_, since we've already
4580  // committed side effects to *this. The standard idiom for
4581  // clearing a Container like std::map, is to swap it with an empty
4582  // Container and let the swapped Container fall out of scope.
4583  if (verbose) {
4584  std::ostringstream os;
4585  os << *prefix << "Free nonlocals_ (std::map)" << endl;
4586  std::cerr << os.str();
4587  }
4588  decltype (nonlocals_) newNonlocals;
4589  std::swap (nonlocals_, newNonlocals);
4590 
4591  // FIXME (mfh 12 Sep 2016) I don't like this all-reduce, and I
4592  // don't like throwing an exception here. A local return value
4593  // would likely be more useful to users. However, if users find
4594  // themselves exercising nonlocal inserts often, then they are
4595  // probably novice users who need the help. See Gibhub Issues
4596  // #603 and #601 (esp. the latter) for discussion.
4597 
4598  int isGloballyComplete = 0; // output argument of reduceAll
4599  reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4600  outArg (isGloballyComplete));
4601  TEUCHOS_TEST_FOR_EXCEPTION
4602  (isGloballyComplete != 1, std::runtime_error, "On at least one process, "
4603  "you called insertGlobalValues with a global row index which is not in "
4604  "the matrix's row Map on any process in its communicator.");
4605  }
4606 
4607  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4608  void
4610  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
4611  {
4612  if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
4613  myGraph_->resumeFill (params);
4614  }
4615  clearGlobalConstants ();
4616  fillComplete_ = false;
4617  }
4618 
4619  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4620  void
4623  {
4624  // This method doesn't do anything. The analogous method in
4625  // CrsGraph does actually compute something.
4626  //
4627  // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
4628  // setting it to -1), but computeGlobalConstants() does _not_
4629  // compute the Frobenius norm; this is done on demand in
4630  // getFrobeniusNorm(), and the result is cached there.
4631  }
4632 
4633  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4634  bool
4637  return getCrsGraphRef ().haveGlobalConstants ();
4638  }
4639 
4640  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4641  void
4644  // We use -1 to indicate that the Frobenius norm needs to be
4645  // recomputed, since the values might change between now and the
4646  // next fillComplete call.
4647  //
4648  // Oddly enough, clearGlobalConstants() clears frobNorm_, but
4649  // computeGlobalConstants() does _not_ compute the Frobenius norm;
4650  // this is done on demand in getFrobeniusNorm(), and the result is
4651  // cached there.
4652  frobNorm_ = -STM::one ();
4653  }
4654 
4655  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4656  void
4658  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
4659  {
4660  const char tfecfFuncName[] = "fillComplete(params): ";
4661 
4662  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4663  (this->getCrsGraph ().is_null (), std::logic_error,
4664  "getCrsGraph() returns null. This should not happen at this point. "
4665  "Please report this bug to the Tpetra developers.");
4666 
4667  const crs_graph_type& graph = this->getCrsGraphRef ();
4668  if (this->isStaticGraph () && graph.isFillComplete ()) {
4669  // If this matrix's graph is fill complete and the user did not
4670  // supply a domain or range Map, use the graph's domain and
4671  // range Maps.
4672  this->fillComplete (graph.getDomainMap (), graph.getRangeMap (), params);
4673  }
4674  else { // assume that user's row Map is the domain and range Map
4675  Teuchos::RCP<const map_type> rangeMap = graph.getRowMap ();
4676  Teuchos::RCP<const map_type> domainMap = rangeMap;
4677  this->fillComplete (domainMap, rangeMap, params);
4678  }
4679  }
4680 
4681  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4682  void
4684  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
4685  const Teuchos::RCP<const map_type>& rangeMap,
4686  const Teuchos::RCP<Teuchos::ParameterList>& params)
4687  {
4688  using Details::Behavior;
4690  using Teuchos::ArrayRCP;
4691  using Teuchos::RCP;
4692  using Teuchos::rcp;
4693  using std::endl;
4694  const char tfecfFuncName[] = "fillComplete: ";
4695  ProfilingRegion regionFillComplete
4696  ("Tpetra::CrsMatrix::fillComplete");
4697  const bool verbose = Behavior::verbose("CrsMatrix");
4698  std::unique_ptr<std::string> prefix;
4699  if (verbose) {
4700  prefix = this->createPrefix("CrsMatrix", "fillComplete(dom,ran,p)");
4701  std::ostringstream os;
4702  os << *prefix << endl;
4703  std::cerr << os.str ();
4704  }
4705  Details::ProfilingRegion region(
4706  "Tpetra::CrsMatrix::fillCompete",
4707  "fillCompete");
4708 
4709  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4710  (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4711  "Matrix fill state must be active (isFillActive() "
4712  "must be true) before you may call fillComplete().");
4713  const int numProcs = this->getComm ()->getSize ();
4714 
4715  //
4716  // Read parameters from the input ParameterList.
4717  //
4718  {
4719  Details::ProfilingRegion region_fc("Tpetra::CrsMatrix::fillCompete", "ParameterList");
4720 
4721  // If true, the caller promises that no process did nonlocal
4722  // changes since the last call to fillComplete.
4723  bool assertNoNonlocalInserts = false;
4724  // If true, makeColMap sorts remote GIDs (within each remote
4725  // process' group).
4726  bool sortGhosts = true;
4727 
4728  if (! params.is_null ()) {
4729  assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
4730  assertNoNonlocalInserts);
4731  if (params->isParameter ("sort column map ghost gids")) {
4732  sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
4733  }
4734  else if (params->isParameter ("Sort column Map ghost GIDs")) {
4735  sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
4736  }
4737  }
4738  // We also don't need to do global assembly if there is only one
4739  // process in the communicator.
4740  const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4741  // This parameter only matters if this matrix owns its graph.
4742  if (! this->myGraph_.is_null ()) {
4743  this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4744  }
4745 
4746  if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4747  if (this->hasColMap ()) { // use local indices
4748  allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4749  }
4750  else { // no column Map, so use global indices
4751  allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4752  }
4753  }
4754  // Global assemble, if we need to. This call only costs a single
4755  // all-reduce if we didn't need global assembly after all.
4756  if (needGlobalAssemble) {
4757  this->globalAssemble ();
4758  }
4759  else {
4760  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4761  (numProcs == 1 && nonlocals_.size() > 0,
4762  std::runtime_error, "Cannot have nonlocal entries on a serial run. "
4763  "An invalid entry (i.e., with row index not in the row Map) must have "
4764  "been submitted to the CrsMatrix.");
4765  }
4766  }
4767  if (this->isStaticGraph ()) {
4768  Details::ProfilingRegion region_isg("Tpetra::CrsMatrix::fillCompete", "isStaticGraph");
4769  // FIXME (mfh 14 Nov 2016) In order to fix #843, I enable the
4770  // checks below only in debug mode. It would be nicer to do a
4771  // local check, then propagate the error state in a deferred
4772  // way, whenever communication happens. That would reduce the
4773  // cost of checking, to the point where it may make sense to
4774  // enable it even in release mode.
4775 #ifdef HAVE_TPETRA_DEBUG
4776  // FIXME (mfh 18 Jun 2014) This check for correctness of the
4777  // input Maps incurs a penalty of two all-reduces for the
4778  // otherwise optimal const graph case.
4779  //
4780  // We could turn these (max) 2 all-reduces into (max) 1, by
4781  // fusing them. We could do this by adding a "locallySameAs"
4782  // method to Map, which would return one of four states:
4783  //
4784  // a. Certainly globally the same
4785  // b. Certainly globally not the same
4786  // c. Locally the same
4787  // d. Locally not the same
4788  //
4789  // The first two states don't require further communication.
4790  // The latter two states require an all-reduce to communicate
4791  // globally, but we only need one all-reduce, since we only need
4792  // to check whether at least one of the Maps is wrong.
4793  const bool domainMapsMatch =
4794  this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4795  const bool rangeMapsMatch =
4796  this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4797 
4798  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4799  (! domainMapsMatch, std::runtime_error,
4800  "The CrsMatrix's domain Map does not match the graph's domain Map. "
4801  "The graph cannot be changed because it was given to the CrsMatrix "
4802  "constructor as const. You can fix this by passing in the graph's "
4803  "domain Map and range Map to the matrix's fillComplete call.");
4804 
4805  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4806  (! rangeMapsMatch, std::runtime_error,
4807  "The CrsMatrix's range Map does not match the graph's range Map. "
4808  "The graph cannot be changed because it was given to the CrsMatrix "
4809  "constructor as const. You can fix this by passing in the graph's "
4810  "domain Map and range Map to the matrix's fillComplete call.");
4811 #endif // HAVE_TPETRA_DEBUG
4812 
4813  // The matrix does _not_ own the graph, and the graph's
4814  // structure is already fixed, so just fill the local matrix.
4815  this->fillLocalMatrix (params);
4816  }
4817  else {
4818  Details::ProfilingRegion region_insg("Tpetra::CrsMatrix::fillCompete", "isNotStaticGraph");
4819  // Set the graph's domain and range Maps. This will clear the
4820  // Import if the domain Map has changed (is a different
4821  // pointer), and the Export if the range Map has changed (is a
4822  // different pointer).
4823  this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4824 
4825  // Make the graph's column Map, if necessary.
4826  Teuchos::Array<int> remotePIDs (0);
4827  const bool mustBuildColMap = ! this->hasColMap ();
4828  if (mustBuildColMap) {
4829  this->myGraph_->makeColMap (remotePIDs);
4830  }
4831 
4832  // Make indices local, if necessary. The method won't do
4833  // anything if the graph is already locally indexed.
4834  const std::pair<size_t, std::string> makeIndicesLocalResult =
4835  this->myGraph_->makeIndicesLocal(verbose);
4836  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
4837  // the error state to makeImportExport or
4838  // computeGlobalConstants, which may do all-reduces and thus may
4839  // have the opportunity to communicate that error state.
4840  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4841  (makeIndicesLocalResult.first != 0, std::runtime_error,
4842  makeIndicesLocalResult.second);
4843 
4844  const bool sorted = this->myGraph_->isSorted ();
4845  const bool merged = this->myGraph_->isMerged ();
4846  this->sortAndMergeIndicesAndValues (sorted, merged);
4847 
4848  // Make Import and Export objects, if they haven't been made
4849  // already. If we made a column Map above, reuse information
4850  // from that process to avoid communiation in the Import setup.
4851  this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4852 
4853  // The matrix _does_ own the graph, so fill the local graph at
4854  // the same time as the local matrix.
4855  this->fillLocalGraphAndMatrix (params);
4856 
4857  const bool callGraphComputeGlobalConstants = params.get () == nullptr ||
4858  params->get ("compute global constants", true);
4859  if (callGraphComputeGlobalConstants) {
4860  this->myGraph_->computeGlobalConstants ();
4861  }
4862  else {
4863  this->myGraph_->computeLocalConstants ();
4864  }
4865  this->myGraph_->fillComplete_ = true;
4866  this->myGraph_->checkInternalState ();
4867  }
4868 
4869  {
4870  Details::ProfilingRegion region_ccgc(
4871  "Tpetra::CrsMatrix::fillCompete", "callComputeGlobalConstamnts"
4872  );
4873  const bool callComputeGlobalConstants = params.get () == nullptr ||
4874  params->get ("compute global constants", true);
4875  if (callComputeGlobalConstants) {
4876  this->computeGlobalConstants ();
4877  }
4878  }
4879 
4880  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4881 
4882  this->fillComplete_ = true; // Now we're fill complete!
4883  {
4884  Details::ProfilingRegion region_cis(
4885  "Tpetra::CrsMatrix::fillCompete", "checkInternalState"
4886  );
4887  this->checkInternalState ();
4888  }
4889  }
4890 
4891  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4892  void
4894  expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
4895  const Teuchos::RCP<const map_type> & rangeMap,
4896  const Teuchos::RCP<const import_type>& importer,
4897  const Teuchos::RCP<const export_type>& exporter,
4898  const Teuchos::RCP<Teuchos::ParameterList> &params)
4899  {
4900 #ifdef HAVE_TPETRA_MMM_TIMINGS
4901  std::string label;
4902  if(!params.is_null())
4903  label = params->get("Timer Label",label);
4904  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
4905  using Teuchos::TimeMonitor;
4906 
4907  Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-all")));
4908 #endif
4909 
4910  const char tfecfFuncName[] = "expertStaticFillComplete: ";
4911  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4912  std::runtime_error, "Matrix fill state must be active (isFillActive() "
4913  "must be true) before calling fillComplete().");
4914  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4915  myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
4916 
4917  {
4918 #ifdef HAVE_TPETRA_MMM_TIMINGS
4919  Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string("eSFC-M-Graph")));
4920 #endif
4921  // We will presume globalAssemble is not needed, so we do the ESFC on the graph
4922  myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4923  }
4924 
4925  const bool callComputeGlobalConstants = params.get () == nullptr ||
4926  params->get ("compute global constants", true);
4927  if (callComputeGlobalConstants) {
4928  this->computeGlobalConstants ();
4929  }
4930 
4931  {
4932 #ifdef HAVE_TPETRA_MMM_TIMINGS
4933  TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string("eSFC-M-fLGAM")));
4934 #endif
4935  // Fill the local graph and matrix
4936  fillLocalGraphAndMatrix (params);
4937  }
4938  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4939 
4940  // Now we're fill complete!
4941  fillComplete_ = true;
4942 
4943  // Sanity checks at the end.
4944 #ifdef HAVE_TPETRA_DEBUG
4945  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4946  ": We're at the end of fillComplete(), but isFillActive() is true. "
4947  "Please report this bug to the Tpetra developers.");
4948  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4949  ": We're at the end of fillComplete(), but isFillActive() is true. "
4950  "Please report this bug to the Tpetra developers.");
4951 #endif // HAVE_TPETRA_DEBUG
4952  {
4953 #ifdef HAVE_TPETRA_MMM_TIMINGS
4954  Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS")));
4955 #endif
4956 
4957  checkInternalState();
4958  }
4959  }
4960 
4961  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
4962  size_t
4965  const RowInfo& rowInfo)
4966  {
4967 #ifdef HAVE_TPETRA_DEBUG
4968  const char tfecfFuncName[] = "mergeRowIndicesAndValues: ";
4969 #endif // HAVE_TPETRA_DEBUG
4970 
4971  auto rowValues = this->getRowViewNonConst (rowInfo);
4972  typedef typename std::decay<decltype (rowValues[0]) >::type value_type;
4973  value_type* rowValueIter = rowValues.data ();
4974  auto inds_view = graph.getLocalKokkosRowViewNonConst (rowInfo);
4975 
4976  // beg,end define a half-exclusive interval over which to iterate.
4977  LocalOrdinal* beg = inds_view.data ();
4978  LocalOrdinal* end = inds_view.data () + rowInfo.numEntries;
4979 
4980 #ifdef HAVE_TPETRA_DEBUG
4981  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4982  (rowInfo.allocSize != static_cast<size_t> (inds_view.extent (0)) ||
4983  rowInfo.allocSize != static_cast<size_t> (rowValues.extent (0)),
4984  std::runtime_error, "rowInfo.allocSize = " << rowInfo.allocSize
4985  << " != inds_view.extent(0) = " << inds_view.extent (0)
4986  << " || rowInfo.allocSize = " << rowInfo.allocSize
4987  << " != rowValues.extent(0) = " << rowValues.extent (0) << ".");
4988 #endif // HAVE_TPETRA_DEBUG
4989 
4990  LocalOrdinal* newend = beg;
4991  if (beg != end) {
4992  LocalOrdinal* cur = beg + 1;
4993  value_type* vcur = rowValueIter + 1;
4994  value_type* vend = rowValueIter;
4995  cur = beg+1;
4996  while (cur != end) {
4997  if (*cur != *newend) {
4998  // new entry; save it
4999  ++newend;
5000  ++vend;
5001  (*newend) = (*cur);
5002  (*vend) = (*vcur);
5003  }
5004  else {
5005  // old entry; merge it
5006  //(*vend) = f (*vend, *vcur);
5007  (*vend) += *vcur;
5008  }
5009  ++cur;
5010  ++vcur;
5011  }
5012  ++newend; // one past the last entry, per typical [beg,end) semantics
5013  }
5014  const size_t mergedEntries = newend - beg;
5015  graph.k_numRowEntries_(rowInfo.localRow) = mergedEntries;
5016  const size_t numDups = rowInfo.numEntries - mergedEntries;
5017  return numDups;
5018  }
5019 
5020  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5021  void
5023  sortAndMergeIndicesAndValues (const bool sorted, const bool merged)
5024  {
5025  using ::Tpetra::Details::ProfilingRegion;
5026  typedef LocalOrdinal LO;
5027  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5028  host_execution_space;
5029  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5030  //typedef Kokkos::RangePolicy<Kokkos::Serial, LO> range_type;
5031  const char tfecfFuncName[] = "sortAndMergeIndicesAndValues: ";
5032  ProfilingRegion regionSAM ("Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
5033 
5034  if (! sorted || ! merged) {
5035  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5036  (this->isStaticGraph (), std::runtime_error, "Cannot sort or merge with "
5037  "\"static\" (const) graph, since the matrix does not own the graph.");
5038  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5039  (this->myGraph_.is_null (), std::logic_error, "myGraph_ is null, but "
5040  "this matrix claims ! isStaticGraph(). "
5041  "Please report this bug to the Tpetra developers.");
5042  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5043  (this->isStorageOptimized (), std::logic_error, "It is invalid to call "
5044  "this method if the graph's storage has already been optimized. "
5045  "Please report this bug to the Tpetra developers.");
5046 
5047  crs_graph_type& graph = * (this->myGraph_);
5048  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5049  size_t totalNumDups = 0;
5050  // FIXME (mfh 10 May 2017) This may assume CUDA UVM.
5051  Kokkos::parallel_reduce (range_type (0, lclNumRows),
5052  [this, &graph, sorted, merged] (const LO& lclRow, size_t& numDups) {
5053  const RowInfo rowInfo = graph.getRowInfo (lclRow);
5054  if (! sorted) {
5055  auto lclColInds = graph.getLocalKokkosRowViewNonConst (rowInfo);
5056  auto vals = this->getRowViewNonConst (rowInfo);
5057  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least
5058  // for lclColInds, if not also for values.
5059  sort2 (lclColInds.data (),
5060  lclColInds.data () + rowInfo.numEntries,
5061  vals.data ());
5062  }
5063  if (! merged) {
5064  numDups += this->mergeRowIndicesAndValues (graph, rowInfo);
5065  }
5066  }, totalNumDups);
5067  if (! sorted) {
5068  graph.indicesAreSorted_ = true; // we just sorted every row
5069  }
5070  if (! merged) {
5071  graph.noRedundancies_ = true; // we just merged every row
5072  }
5073  }
5074  }
5075 
5076  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5077  void
5081  Scalar alpha,
5082  Scalar beta) const
5083  {
5085  using Teuchos::RCP;
5086  using Teuchos::rcp;
5087  using Teuchos::rcp_const_cast;
5088  using Teuchos::rcpFromRef;
5089  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5090  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
5091 
5092  // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
5093  // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
5094  // failing only for the Kokkos refactor version of Tpetra. It's a
5095  // good idea regardless to have the bypass.
5096  if (alpha == ZERO) {
5097  if (beta == ZERO) {
5098  Y_in.putScalar (ZERO);
5099  } else if (beta != ONE) {
5100  Y_in.scale (beta);
5101  }
5102  return;
5103  }
5104 
5105  // It's possible that X is a view of Y or vice versa. We don't
5106  // allow this (apply() requires that X and Y not alias one
5107  // another), but it's helpful to detect and work around this case.
5108  // We don't try to to detect the more subtle cases (e.g., one is a
5109  // subview of the other, but their initial pointers differ). We
5110  // only need to do this if this matrix's Import is trivial;
5111  // otherwise, we don't actually apply the operator from X into Y.
5112 
5113  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5114  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5115 
5116  // If beta == 0, then the output MV will be overwritten; none of
5117  // its entries should be read. (Sparse BLAS semantics say that we
5118  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
5119  // This matters if we need to do an Export operation; see below.
5120  const bool Y_is_overwritten = (beta == ZERO);
5121 
5122  // We treat the case of a replicated MV output specially.
5123  const bool Y_is_replicated =
5124  (! Y_in.isDistributed () && this->getComm ()->getSize () != 1);
5125 
5126  // This is part of the special case for replicated MV output.
5127  // We'll let each process do its thing, but do an all-reduce at
5128  // the end to sum up the results. Setting beta=0 on all processes
5129  // but Proc 0 makes the math work out for the all-reduce. (This
5130  // assumes that the replicated data is correctly replicated, so
5131  // that the data are the same on all processes.)
5132  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5133  beta = ZERO;
5134  }
5135 
5136  // Temporary MV for Import operation. After the block of code
5137  // below, this will be an (Imported if necessary) column Map MV
5138  // ready to give to localApply(...).
5139  RCP<const MV> X_colMap;
5140  if (importer.is_null ()) {
5141  if (! X_in.isConstantStride ()) {
5142  // Not all sparse mat-vec kernels can handle an input MV with
5143  // nonconstant stride correctly, so we have to copy it in that
5144  // case into a constant stride MV. To make a constant stride
5145  // copy of X_in, we force creation of the column (== domain)
5146  // Map MV (if it hasn't already been created, else fetch the
5147  // cached copy). This avoids creating a new MV each time.
5148  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
5149  Tpetra::deep_copy (*X_colMapNonConst, X_in);
5150  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5151  }
5152  else {
5153  // The domain and column Maps are the same, so do the local
5154  // multiply using the domain Map input MV X_in.
5155  X_colMap = rcpFromRef (X_in);
5156  }
5157  }
5158  else { // need to Import source (multi)vector
5159  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply: Import");
5160 
5161  // We're doing an Import anyway, which will copy the relevant
5162  // elements of the domain Map MV X_in into a separate column Map
5163  // MV. Thus, we don't have to worry whether X_in is constant
5164  // stride.
5165  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
5166 
5167  // Import from the domain Map MV to the column Map MV.
5168  X_colMapNonConst->doImport (X_in, *importer, INSERT);
5169  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
5170  }
5171 
5172  // Temporary MV for doExport (if needed), or for copying a
5173  // nonconstant stride output MV into a constant stride MV. This
5174  // is null if we don't need the temporary MV, that is, if the
5175  // Export is trivial (null).
5176  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
5177 
5178  // If we have a nontrivial Export object, we must perform an
5179  // Export. In that case, the local multiply result will go into
5180  // the row Map multivector. We don't have to make a
5181  // constant-stride version of Y_in in this case, because we had to
5182  // make a constant stride Y_rowMap MV and do an Export anyway.
5183  if (! exporter.is_null ()) {
5184  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
5185  {
5186  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply: Export");
5187 
5188  // If we're overwriting the output MV Y_in completely (beta ==
5189  // 0), then make sure that it is filled with zeros before we
5190  // do the Export. Otherwise, the ADD combine mode will use
5191  // data in Y_in, which is supposed to be zero.
5192  if (Y_is_overwritten) {
5193  Y_in.putScalar (ZERO);
5194  }
5195  else {
5196  // Scale output MV by beta, so that doExport sums in the
5197  // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
5198  Y_in.scale (beta);
5199  }
5200  // Do the Export operation.
5201  Y_in.doExport (*Y_rowMap, *exporter, ADD);
5202  }
5203  }
5204  else { // Don't do an Export: row Map and range Map are the same.
5205  //
5206  // If Y_in does not have constant stride, or if the column Map
5207  // MV aliases Y_in, then we can't let the kernel write directly
5208  // to Y_in. Instead, we have to use the cached row (== range)
5209  // Map MV as temporary storage.
5210  //
5211  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5212  // the user passed in the same MultiVector for both X and Y. It
5213  // won't detect whether one MultiVector views the other. We
5214  // should also check the MultiVectors' raw data pointers.
5215  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
5216  // Force creating the MV if it hasn't been created already.
5217  // This will reuse a previously created cached MV.
5218  Y_rowMap = getRowMapMultiVector (Y_in, true);
5219 
5220  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
5221  // since we're overwriting it anyway.
5222  if (beta != ZERO) {
5223  Tpetra::deep_copy (*Y_rowMap, Y_in);
5224  }
5225  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
5226  Tpetra::deep_copy (Y_in, *Y_rowMap);
5227  }
5228  else {
5229  this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
5230  }
5231  }
5232 
5233  // If the range Map is a locally replicated Map, sum up
5234  // contributions from each process. We set beta = 0 on all
5235  // processes but Proc 0 initially, so this will handle the scaling
5236  // factor beta correctly.
5237  if (Y_is_replicated) {
5238  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply: Reduce Y");
5239  Y_in.reduce ();
5240  }
5241  }
5242 
5243  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5244  void
5248  const Teuchos::ETransp mode,
5249  Scalar alpha,
5250  Scalar beta) const
5251  {
5253  using Teuchos::null;
5254  using Teuchos::RCP;
5255  using Teuchos::rcp;
5256  using Teuchos::rcp_const_cast;
5257  using Teuchos::rcpFromRef;
5258  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5259 
5260  // Take shortcuts for alpha == 0.
5261  if (alpha == ZERO) {
5262  // Follow the Sparse BLAS convention by ignoring both the matrix
5263  // and X_in, in this case.
5264  if (beta == ZERO) {
5265  // Follow the Sparse BLAS convention by overwriting any Inf or
5266  // NaN values in Y_in, in this case.
5267  Y_in.putScalar (ZERO);
5268  }
5269  else {
5270  Y_in.scale (beta);
5271  }
5272  return;
5273  }
5274 
5275  const size_t numVectors = X_in.getNumVectors ();
5276 
5277  // We don't allow X_in and Y_in to alias one another. It's hard
5278  // to check this, because advanced users could create views from
5279  // raw pointers. However, if X_in and Y_in reference the same
5280  // object, we will do the user a favor by copying X into new
5281  // storage (with a warning). We only need to do this if we have
5282  // trivial importers; otherwise, we don't actually apply the
5283  // operator from X into Y.
5284  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5285  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5286  // access X indirectly, in case we need to create temporary storage
5287  RCP<const MV> X;
5288 
5289  // some parameters for below
5290  const bool Y_is_replicated = ! Y_in.isDistributed ();
5291  const bool Y_is_overwritten = (beta == ZERO);
5292  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
5293  beta = ZERO;
5294  }
5295 
5296  // The kernels do not allow input or output with nonconstant stride.
5297  if (! X_in.isConstantStride () && importer.is_null ()) {
5298  X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
5299  } else {
5300  X = rcpFromRef (X_in); // Reference to X_in
5301  }
5302 
5303  // Set up temporary multivectors for Import and/or Export.
5304  if (importer != Teuchos::null) {
5305  if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
5306  importMV_ = null;
5307  }
5308  if (importMV_ == null) {
5309  importMV_ = rcp (new MV (this->getColMap (), numVectors));
5310  }
5311  }
5312  if (exporter != Teuchos::null) {
5313  if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
5314  exportMV_ = null;
5315  }
5316  if (exportMV_ == null) {
5317  exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
5318  }
5319  }
5320 
5321  // If we have a non-trivial exporter, we must import elements that
5322  // are permuted or are on other processors.
5323  if (! exporter.is_null ()) {
5324  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply (transpose): Import");
5325  exportMV_->doImport (X_in, *exporter, INSERT);
5326  X = exportMV_; // multiply out of exportMV_
5327  }
5328 
5329  // If we have a non-trivial importer, we must export elements that
5330  // are permuted or belong to other processors. We will compute
5331  // solution into the to-be-exported MV; get a view.
5332  if (importer != Teuchos::null) {
5333  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply (transpose): Export");
5334 
5335  // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
5336  // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
5337  // back and figure out why this helps. importMV_ SHOULD be
5338  // completely overwritten in the localApply(...) call
5339  // below, because beta == ZERO there.
5340  importMV_->putScalar (ZERO);
5341  // Do the local computation.
5342  this->localApply (*X, *importMV_, mode, alpha, ZERO);
5343  if (Y_is_overwritten) {
5344  Y_in.putScalar (ZERO);
5345  } else {
5346  Y_in.scale (beta);
5347  }
5348  Y_in.doExport (*importMV_, *importer, ADD);
5349  }
5350  // otherwise, multiply into Y
5351  else {
5352  // can't multiply in-situ; can't multiply into non-strided multivector
5353  //
5354  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
5355  // the user passed in the same MultiVector for both X and Y. It
5356  // won't detect whether one MultiVector views the other. We
5357  // should also check the MultiVectors' raw data pointers.
5358  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
5359  // Make a deep copy of Y_in, into which to write the multiply result.
5360  MV Y (Y_in, Teuchos::Copy);
5361  this->localApply (*X, Y, mode, alpha, beta);
5362  Tpetra::deep_copy (Y_in, Y);
5363  } else {
5364  this->localApply (*X, Y_in, mode, alpha, beta);
5365  }
5366  }
5367 
5368  // If the range Map is a locally replicated map, sum the
5369  // contributions from each process. (That's why we set beta=0
5370  // above for all processes but Proc 0.)
5371  if (Y_is_replicated) {
5372  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply (transpose): Reduce Y");
5373  Y_in.reduce ();
5374  }
5375  }
5376 
5377  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5378  void
5382  const Teuchos::ETransp mode,
5383  const Scalar& alpha,
5384  const Scalar& beta) const
5385  {
5387  using Teuchos::NO_TRANS;
5388  ProfilingRegion regionLocalApply ("Tpetra::CrsMatrix::localApply");
5389 
5390  auto X_lcl = X.getLocalViewDevice ();
5391  auto Y_lcl = Y.getLocalViewDevice ();
5392  // TODO (24 Jul 2019) uncomment later; this line of code wasn't
5393  // here before, so we need to test it separately before pushing.
5394  //
5395  // Y.modify_device ();
5396 
5397  const bool debug = ::Tpetra::Details::Behavior::debug ();
5398  if (debug) {
5399  const char tfecfFuncName[] = "localApply: ";
5400  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5401  (lclMatrix_.get () == nullptr, std::logic_error,
5402  "lclMatrix_ not created yet.");
5403  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5404  (X.getNumVectors () != Y.getNumVectors (), std::runtime_error,
5405  "X.getNumVectors() = " << X.getNumVectors () << " != "
5406  "Y.getNumVectors() = " << Y.getNumVectors () << ".");
5407  const bool transpose = (mode != Teuchos::NO_TRANS);
5408  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5409  (! transpose && X.getLocalLength () !=
5410  getColMap ()->getNodeNumElements (), std::runtime_error,
5411  "NO_TRANS case: X has the wrong number of local rows. "
5412  "X.getLocalLength() = " << X.getLocalLength () << " != "
5413  "getColMap()->getNodeNumElements() = " <<
5414  getColMap ()->getNodeNumElements () << ".");
5415  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5416  (! transpose && Y.getLocalLength () !=
5417  getRowMap ()->getNodeNumElements (), std::runtime_error,
5418  "NO_TRANS case: Y has the wrong number of local rows. "
5419  "Y.getLocalLength() = " << Y.getLocalLength () << " != "
5420  "getRowMap()->getNodeNumElements() = " <<
5421  getRowMap ()->getNodeNumElements () << ".");
5422  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5423  (transpose && X.getLocalLength () !=
5424  getRowMap ()->getNodeNumElements (), std::runtime_error,
5425  "TRANS or CONJ_TRANS case: X has the wrong number of local "
5426  "rows. X.getLocalLength() = " << X.getLocalLength ()
5427  << " != getRowMap()->getNodeNumElements() = "
5428  << getRowMap ()->getNodeNumElements () << ".");
5429  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5430  (transpose && Y.getLocalLength () !=
5431  getColMap ()->getNodeNumElements (), std::runtime_error,
5432  "TRANS or CONJ_TRANS case: X has the wrong number of local "
5433  "rows. Y.getLocalLength() = " << Y.getLocalLength ()
5434  << " != getColMap()->getNodeNumElements() = "
5435  << getColMap ()->getNodeNumElements () << ".");
5436  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5437  (! isFillComplete (), std::runtime_error, "The matrix is not "
5438  "fill complete. You must call fillComplete() (possibly with "
5439  "domain and range Map arguments) without an intervening "
5440  "resumeFill() call before you may call this method.");
5441  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5442  (! X.isConstantStride () || ! Y.isConstantStride (),
5443  std::runtime_error, "X and Y must be constant stride.");
5444  // If the two pointers are null, then they don't alias one
5445  // another, even though they are equal.
5446  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5447  (X_lcl.data () == Y_lcl.data () && X_lcl.data () != nullptr,
5448  std::runtime_error, "X and Y may not alias one another.");
5449  }
5450 
5451  LocalOrdinal nrows = getNodeNumRows();
5452  LocalOrdinal maxRowImbalance = 0;
5453  if(nrows != 0)
5454  maxRowImbalance = getNodeMaxNumRowEntries() - (getNodeNumEntries() / nrows);
5455  if(size_t(maxRowImbalance) >= Tpetra::Details::Behavior::rowImbalanceThreshold())
5456  lclMatrix_->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5457  else
5458  lclMatrix_->apply (X_lcl, Y_lcl, mode, alpha, beta);
5459  }
5460 
5461  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5462  void
5466  Teuchos::ETransp mode,
5467  Scalar alpha,
5468  Scalar beta) const
5469  {
5471  const char fnName[] = "Tpetra::CrsMatrix::apply";
5472 
5473  TEUCHOS_TEST_FOR_EXCEPTION
5474  (! isFillComplete (), std::runtime_error,
5475  fnName << ": Cannot call apply() until fillComplete() "
5476  "has been called.");
5477 
5478  if (mode == Teuchos::NO_TRANS) {
5479  ProfilingRegion regionNonTranspose (fnName);
5480  this->applyNonTranspose (X, Y, alpha, beta);
5481  }
5482  else {
5483  ProfilingRegion regionTranspose ("Tpetra::CrsMatrix::apply (transpose)");
5484 
5485  //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
5486  //when bets==0. This was not the case with transpose in a multithreaded
5487  //environment where a multiplication with subsequent atomic_adds is used
5488  //since 0 is effectively not special cased. Doing the explicit set to zero here
5489  //This catches cases where Y is nan or inf.
5490  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5491  if (beta == ZERO) {
5492  Y.putScalar (ZERO);
5493  }
5494  this->applyTranspose (X, Y, mode, alpha, beta);
5495  }
5496  }
5497 
5498  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5499  void
5504  const Scalar& dampingFactor,
5505  const ESweepDirection direction,
5506  const int numSweeps) const
5507  {
5508  reorderedGaussSeidel (B, X, D, Teuchos::null, dampingFactor, direction, numSweeps);
5509  }
5510 
5511  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5512  void
5517  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
5518  const Scalar& dampingFactor,
5519  const ESweepDirection direction,
5520  const int numSweeps) const
5521  {
5522  using Teuchos::null;
5523  using Teuchos::RCP;
5524  using Teuchos::rcp;
5525  using Teuchos::rcp_const_cast;
5526  using Teuchos::rcpFromRef;
5527  typedef Scalar ST;
5528 
5529  TEUCHOS_TEST_FOR_EXCEPTION(
5530  isFillComplete() == false, std::runtime_error,
5531  "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until "
5532  "fillComplete() has been called.");
5533  TEUCHOS_TEST_FOR_EXCEPTION(
5534  numSweeps < 0,
5535  std::invalid_argument,
5536  "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , "
5537  "nonnegative but you provided numSweeps = " << numSweeps << " < 0.");
5538 
5539  // Translate from global to local sweep direction.
5540  // While doing this, validate the input.
5541  ESweepDirection localDirection;
5542  if (direction == Forward) {
5543  localDirection = Forward;
5544  }
5545  else if (direction == Backward) {
5546  localDirection = Backward;
5547  }
5548  else if (direction == Symmetric) {
5549  // We'll control local sweep direction manually.
5550  localDirection = Forward;
5551  }
5552  else {
5553  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
5554  "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have "
5555  "any of its valid values: Forward, Backward, or Symmetric.");
5556  }
5557 
5558  if (numSweeps == 0) {
5559  return; // Nothing to do.
5560  }
5561 
5562  // We don't need the Export object because this method assumes
5563  // that the row, domain, and range Maps are the same. We do need
5564  // the Import object, if there is one, though.
5565  RCP<const import_type> importer = this->getGraph()->getImporter();
5566  RCP<const export_type> exporter = this->getGraph()->getExporter();
5567  TEUCHOS_TEST_FOR_EXCEPTION(
5568  ! exporter.is_null (), std::runtime_error,
5569  "Tpetra's gaussSeidel implementation requires that the row, domain, "
5570  "and range Maps be the same. This cannot be the case, because the "
5571  "matrix has a nontrivial Export object.");
5572 
5573  RCP<const map_type> domainMap = this->getDomainMap ();
5574  RCP<const map_type> rangeMap = this->getRangeMap ();
5575  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
5576  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
5577 
5578 #ifdef HAVE_TEUCHOS_DEBUG
5579  {
5580  // The relation 'isSameAs' is transitive. It's also a
5581  // collective, so we don't have to do a "shared" test for
5582  // exception (i.e., a global reduction on the test value).
5583  TEUCHOS_TEST_FOR_EXCEPTION(
5584  ! X.getMap ()->isSameAs (*domainMap),
5585  std::runtime_error,
5586  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5587  "multivector X be in the domain Map of the matrix.");
5588  TEUCHOS_TEST_FOR_EXCEPTION(
5589  ! B.getMap ()->isSameAs (*rangeMap),
5590  std::runtime_error,
5591  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5592  "B be in the range Map of the matrix.");
5593  TEUCHOS_TEST_FOR_EXCEPTION(
5594  ! D.getMap ()->isSameAs (*rowMap),
5595  std::runtime_error,
5596  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
5597  "D be in the row Map of the matrix.");
5598  TEUCHOS_TEST_FOR_EXCEPTION(
5599  ! rowMap->isSameAs (*rangeMap),
5600  std::runtime_error,
5601  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
5602  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
5603  TEUCHOS_TEST_FOR_EXCEPTION(
5604  ! domainMap->isSameAs (*rangeMap),
5605  std::runtime_error,
5606  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
5607  "the range Map of the matrix be the same.");
5608  }
5609 #else
5610  // Forestall any compiler warnings for unused variables.
5611  (void) rangeMap;
5612  (void) rowMap;
5613 #endif // HAVE_TEUCHOS_DEBUG
5614 
5615  // If B is not constant stride, copy it into a constant stride
5616  // multivector. We'l handle the right-hand side B first and deal
5617  // with X right before the sweeps, to improve locality of the
5618  // first sweep. (If the problem is small enough, then that will
5619  // hopefully keep more of the entries of X in cache. This
5620  // optimizes for the typical case of a small number of sweeps.)
5621  RCP<const MV> B_in;
5622  if (B.isConstantStride()) {
5623  B_in = rcpFromRef (B);
5624  }
5625  else {
5626  // The range Map and row Map are the same in this case, so we
5627  // can use the (possibly cached) row Map multivector to store a
5628  // constant stride copy of B. We don't have to copy back, since
5629  // Gauss-Seidel won't modify B.
5630  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
5631  deep_copy (*B_in_nonconst, B); // Copy from B into B_in(_nonconst).
5632  B_in = rcp_const_cast<const MV> (B_in_nonconst);
5633 
5635  ! B.isConstantStride (),
5636  std::runtime_error,
5637  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
5638  "requires that X and B both have constant stride. Since B does not "
5639  "have constant stride, we had to make a copy. This is a limitation of "
5640  "the current implementation and not your fault, but we still report it "
5641  "as an efficiency warning for your information.");
5642  }
5643 
5644  // If X is not constant stride, copy it into a constant stride
5645  // multivector. Also, make the column Map multivector X_colMap,
5646  // and its domain Map view X_domainMap. (X actually must be a
5647  // domain Map view of a column Map multivector; exploit this, if X
5648  // has constant stride.)
5649 
5650  RCP<MV> X_domainMap;
5651  RCP<MV> X_colMap;
5652  bool copiedInput = false;
5653 
5654  if (importer.is_null ()) { // Domain and column Maps are the same.
5655  if (X.isConstantStride ()) {
5656  X_domainMap = rcpFromRef (X);
5657  X_colMap = X_domainMap;
5658  copiedInput = false;
5659  }
5660  else {
5661  // Get a temporary column Map multivector, make a domain Map
5662  // view of it, and copy X into the domain Map view. We have
5663  // to copy here because we won't be doing Import operations.
5664  X_colMap = getColumnMapMultiVector (X, true);
5665  X_domainMap = X_colMap; // Domain and column Maps are the same.
5666  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
5667  copiedInput = true;
5669  ! X.isConstantStride (), std::runtime_error,
5670  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
5671  "Gauss-Seidel kernel requires that X and B both have constant "
5672  "stride. Since X does not have constant stride, we had to make a "
5673  "copy. This is a limitation of the current implementation and not "
5674  "your fault, but we still report it as an efficiency warning for "
5675  "your information.");
5676  }
5677  }
5678  else { // We will be doing Import operations in the sweeps.
5679  if (X.isConstantStride ()) {
5680  X_domainMap = rcpFromRef (X);
5681  // This kernel assumes that X is a domain Map view of a column
5682  // Map multivector. We will only check if this is valid if
5683  // the CMake configure Teuchos_ENABLE_DEBUG is ON.
5684  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
5685 
5686  // FIXME (mfh 19 Mar 2013) Do we need to fill the remote
5687  // entries of X_colMap with zeros? Do we need to fill all of
5688  // X_domainMap initially with zeros? Ifpack
5689  // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely
5690  // new MultiVector each time.
5691 
5692  // Do the first Import for the first sweep. This simplifies
5693  // the logic in the sweeps.
5694  X_colMap->doImport (X, *importer, INSERT);
5695  copiedInput = false;
5696  }
5697  else {
5698  // Get a temporary column Map multivector X_colMap, and make a
5699  // domain Map view X_domainMap of it. Instead of copying, we
5700  // do an Import from X into X_domainMap. This saves us a
5701  // copy, since the Import has to copy the data anyway.
5702  X_colMap = getColumnMapMultiVector (X, true);
5703  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
5704  X_colMap->doImport (X, *importer, INSERT);
5705  copiedInput = true;
5707  ! X.isConstantStride (), std::runtime_error,
5708  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
5709  "Gauss-Seidel kernel requires that X and B both have constant stride. "
5710  "Since X does not have constant stride, we had to make a copy. "
5711  "This is a limitation of the current implementation and not your fault, "
5712  "but we still report it as an efficiency warning for your information.");
5713  }
5714  }
5715 
5716  for (int sweep = 0; sweep < numSweeps; ++sweep) {
5717  if (! importer.is_null () && sweep > 0) {
5718  // We already did the first Import for the zeroth sweep.
5719  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5720  }
5721 
5722  // Do local Gauss-Seidel.
5723  if (direction != Symmetric) {
5724  if (rowIndices.is_null ()) {
5725  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5726  dampingFactor,
5727  localDirection);
5728  }
5729  else {
5730  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5731  D, rowIndices,
5732  dampingFactor,
5733  localDirection);
5734  }
5735  }
5736  else { // direction == Symmetric
5737  const bool doImportBetweenDirections = false;
5738  if (rowIndices.is_null ()) {
5739  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5740  dampingFactor,
5741  Forward);
5742  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
5743  // Gauss-Seidel" does _not_ do an Import between the forward
5744  // and backward sweeps. This makes sense, because Aztec
5745  // considers "symmetric Gauss-Seidel" a subdomain solver.
5746  if (doImportBetweenDirections) {
5747  // Communicate again before the Backward sweep.
5748  if (! importer.is_null ()) {
5749  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5750  }
5751  }
5752  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5753  dampingFactor,
5754  Backward);
5755  }
5756  else {
5757  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5758  D, rowIndices,
5759  dampingFactor,
5760  Forward);
5761  if (doImportBetweenDirections) {
5762  // Communicate again before the Backward sweep.
5763  if (! importer.is_null ()) {
5764  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5765  }
5766  }
5767  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5768  D, rowIndices,
5769  dampingFactor,
5770  Backward);
5771  }
5772  }
5773  }
5774 
5775  if (copiedInput) {
5776  deep_copy (X, *X_domainMap); // Copy back from X_domainMap to X.
5777  }
5778  }
5779 
5780  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5781  void
5786  const Scalar& dampingFactor,
5787  const ESweepDirection direction,
5788  const int numSweeps,
5789  const bool zeroInitialGuess) const
5790  {
5791  reorderedGaussSeidelCopy (X, B, D, Teuchos::null, dampingFactor, direction,
5792  numSweeps, zeroInitialGuess);
5793  }
5794 
5795  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
5796  void
5801  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
5802  const Scalar& dampingFactor,
5803  const ESweepDirection direction,
5804  const int numSweeps,
5805  const bool zeroInitialGuess) const
5806  {
5807  using Teuchos::null;
5808  using Teuchos::RCP;
5809  using Teuchos::rcp;
5810  using Teuchos::rcpFromRef;
5811  using Teuchos::rcp_const_cast;
5812  typedef Scalar ST;
5813  const char prefix[] = "Tpetra::CrsMatrix::(reordered)gaussSeidelCopy: ";
5814  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5815 
5816  TEUCHOS_TEST_FOR_EXCEPTION(
5817  ! isFillComplete (), std::runtime_error,
5818  prefix << "The matrix is not fill complete.");
5819  TEUCHOS_TEST_FOR_EXCEPTION(
5820  numSweeps < 0, std::invalid_argument,
5821  prefix << "The number of sweeps must be nonnegative, "
5822  "but you provided numSweeps = " << numSweeps << " < 0.");
5823 
5824  // Translate from global to local sweep direction.
5825  // While doing this, validate the input.
5826  ESweepDirection localDirection;
5827  if (direction == Forward) {
5828  localDirection = Forward;
5829  }
5830  else if (direction == Backward) {
5831  localDirection = Backward;
5832  }
5833  else if (direction == Symmetric) {
5834  // We'll control local sweep direction manually.
5835  localDirection = Forward;
5836  }
5837  else {
5838  TEUCHOS_TEST_FOR_EXCEPTION(
5839  true, std::invalid_argument,
5840  prefix << "The 'direction' enum does not have any of its valid "
5841  "values: Forward, Backward, or Symmetric.");
5842  }
5843 
5844  if (numSweeps == 0) {
5845  return;
5846  }
5847 
5848  RCP<const import_type> importer = this->getGraph ()->getImporter ();
5849  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
5850  TEUCHOS_TEST_FOR_EXCEPTION(
5851  ! exporter.is_null (), std::runtime_error,
5852  "This method's implementation currently requires that the matrix's row, "
5853  "domain, and range Maps be the same. This cannot be the case, because "
5854  "the matrix has a nontrivial Export object.");
5855 
5856  RCP<const map_type> domainMap = this->getDomainMap ();
5857  RCP<const map_type> rangeMap = this->getRangeMap ();
5858  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
5859  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
5860 
5861 #ifdef HAVE_TEUCHOS_DEBUG
5862  {
5863  // The relation 'isSameAs' is transitive. It's also a
5864  // collective, so we don't have to do a "shared" test for
5865  // exception (i.e., a global reduction on the test value).
5866  TEUCHOS_TEST_FOR_EXCEPTION(
5867  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
5868  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5869  "multivector X be in the domain Map of the matrix.");
5870  TEUCHOS_TEST_FOR_EXCEPTION(
5871  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
5872  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5873  "B be in the range Map of the matrix.");
5874  TEUCHOS_TEST_FOR_EXCEPTION(
5875  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
5876  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5877  "D be in the row Map of the matrix.");
5878  TEUCHOS_TEST_FOR_EXCEPTION(
5879  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
5880  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
5881  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
5882  TEUCHOS_TEST_FOR_EXCEPTION(
5883  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
5884  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
5885  "the range Map of the matrix be the same.");
5886  }
5887 #else
5888  // Forestall any compiler warnings for unused variables.
5889  (void) rangeMap;
5890  (void) rowMap;
5891 #endif // HAVE_TEUCHOS_DEBUG
5892 
5893  // Fetch a (possibly cached) temporary column Map multivector
5894  // X_colMap, and a domain Map view X_domainMap of it. Both have
5895  // constant stride by construction. We know that the domain Map
5896  // must include the column Map, because our Gauss-Seidel kernel
5897  // requires that the row Map, domain Map, and range Map are all
5898  // the same, and that each process owns all of its own diagonal
5899  // entries of the matrix.
5900 
5901  RCP<MV> X_colMap;
5902  RCP<MV> X_domainMap;
5903  bool copyBackOutput = false;
5904  if (importer.is_null ()) {
5905  if (X.isConstantStride ()) {
5906  X_colMap = rcpFromRef (X);
5907  X_domainMap = rcpFromRef (X);
5908  // Column Map and domain Map are the same, so there are no
5909  // remote entries. Thus, if we are not setting the initial
5910  // guess to zero, we don't have to worry about setting remote
5911  // entries to zero, even though we are not doing an Import in
5912  // this case.
5913  if (zeroInitialGuess) {
5914  X_colMap->putScalar (ZERO);
5915  }
5916  // No need to copy back to X at end.
5917  }
5918  else { // We must copy X into a constant stride multivector.
5919  // Just use the cached column Map multivector for that.
5920  // force=true means fill with zeros, so no need to fill
5921  // remote entries (not in domain Map) with zeros.
5922  X_colMap = getColumnMapMultiVector (X, true);
5923  // X_domainMap is always a domain Map view of the column Map
5924  // multivector. In this case, the domain and column Maps are
5925  // the same, so X_domainMap _is_ X_colMap.
5926  X_domainMap = X_colMap;
5927  if (! zeroInitialGuess) { // Don't copy if zero initial guess
5928  try {
5929  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
5930  } catch (std::exception& e) {
5931  std::ostringstream os;
5932  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
5933  "deep_copy(*X_domainMap, X) threw an exception: "
5934  << e.what () << ".";
5935  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
5936  }
5937  }
5938  copyBackOutput = true; // Don't forget to copy back at end.
5940  ! X.isConstantStride (),
5941  std::runtime_error,
5942  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
5943  "kernel requires that X and B both have constant stride. Since X "
5944  "does not have constant stride, we had to make a copy. This is a "
5945  "limitation of the current implementation and not your fault, but we "
5946  "still report it as an efficiency warning for your information.");
5947  }
5948  }
5949  else { // Column Map and domain Map are _not_ the same.
5950  X_colMap = getColumnMapMultiVector (X);
5951  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
5952 
5953 #ifdef HAVE_TPETRA_DEBUG
5954  auto X_colMap_host_view = X_colMap->getLocalViewHost ();
5955  auto X_domainMap_host_view = X_domainMap->getLocalViewHost ();
5956 
5957  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
5958  TEUCHOS_TEST_FOR_EXCEPTION
5959  (X_colMap_host_view.data () != X_domainMap_host_view.data (),
5960  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: Pointer to "
5961  "start of column Map view of X is not equal to pointer to start of "
5962  "(domain Map view of) X. This may mean that Tpetra::MultiVector::"
5963  "offsetViewNonConst is broken. "
5964  "Please report this bug to the Tpetra developers.");
5965  }
5966 
5967  TEUCHOS_TEST_FOR_EXCEPTION(
5968  X_colMap_host_view.extent (0) < X_domainMap_host_view.extent (0) ||
5969  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
5970  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
5971  "X_colMap has fewer local rows than X_domainMap. "
5972  "X_colMap_host_view.extent(0) = " << X_colMap_host_view.extent (0)
5973  << ", X_domainMap_host_view.extent(0) = "
5974  << X_domainMap_host_view.extent (0)
5975  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
5976  << ", and X_domainMap->getLocalLength() = "
5977  << X_domainMap->getLocalLength ()
5978  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
5979  "is broken. Please report this bug to the Tpetra developers.");
5980 
5981  TEUCHOS_TEST_FOR_EXCEPTION(
5982  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
5983  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
5984  "X_colMap has a different number of columns than X_domainMap. "
5985  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
5986  << " != X_domainMap->getNumVectors() = "
5987  << X_domainMap->getNumVectors ()
5988  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
5989  "is broken. Please report this bug to the Tpetra developers.");
5990 #endif // HAVE_TPETRA_DEBUG
5991 
5992  if (zeroInitialGuess) {
5993  // No need for an Import, since we're filling with zeros.
5994  X_colMap->putScalar (ZERO);
5995  } else {
5996  // We could just copy X into X_domainMap. However, that
5997  // wastes a copy, because the Import also does a copy (plus
5998  // communication). Since the typical use case for
5999  // Gauss-Seidel is a small number of sweeps (2 is typical), we
6000  // don't want to waste that copy. Thus, we do the Import
6001  // here, and skip the first Import in the first sweep.
6002  // Importing directly from X effects the copy into X_domainMap
6003  // (which is a view of X_colMap).
6004  X_colMap->doImport (X, *importer, INSERT);
6005  }
6006  copyBackOutput = true; // Don't forget to copy back at end.
6007  } // if column and domain Maps are (not) the same
6008 
6009  // The Gauss-Seidel / SOR kernel expects multivectors of constant
6010  // stride. X_colMap is by construction, but B might not be. If
6011  // it's not, we have to make a copy.
6012  RCP<const MV> B_in;
6013  if (B.isConstantStride ()) {
6014  B_in = rcpFromRef (B);
6015  }
6016  else {
6017  // Range Map and row Map are the same in this case, so we can
6018  // use the cached row Map multivector to store a constant stride
6019  // copy of B.
6020  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
6021  try {
6022  deep_copy (*B_in_nonconst, B);
6023  } catch (std::exception& e) {
6024  std::ostringstream os;
6025  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
6026  "deep_copy(*B_in_nonconst, B) threw an exception: "
6027  << e.what () << ".";
6028  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
6029  }
6030  B_in = rcp_const_cast<const MV> (B_in_nonconst);
6031 
6033  ! B.isConstantStride (),
6034  std::runtime_error,
6035  "gaussSeidelCopy: The current implementation requires that B have "
6036  "constant stride. Since B does not have constant stride, we had to "
6037  "copy it into a separate constant-stride multivector. This is a "
6038  "limitation of the current implementation and not your fault, but we "
6039  "still report it as an efficiency warning for your information.");
6040  }
6041 
6042  for (int sweep = 0; sweep < numSweeps; ++sweep) {
6043  if (! importer.is_null () && sweep > 0) {
6044  // We already did the first Import for the zeroth sweep above,
6045  // if it was necessary.
6046  X_colMap->doImport (*X_domainMap, *importer, INSERT);
6047  }
6048 
6049  // Do local Gauss-Seidel.
6050  if (direction != Symmetric) {
6051  if (rowIndices.is_null ()) {
6052  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
6053  dampingFactor,
6054  localDirection);
6055  }
6056  else {
6057  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
6058  D, rowIndices,
6059  dampingFactor,
6060  localDirection);
6061  }
6062  }
6063  else { // direction == Symmetric
6064  if (rowIndices.is_null ()) {
6065  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
6066  dampingFactor,
6067  Forward);
6068  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
6069  // Gauss-Seidel" does _not_ do an Import between the forward
6070  // and backward sweeps. This makes symmetric Gauss-Seidel a
6071  // symmetric preconditioner if the matrix A is symmetric. We
6072  // imitate Aztec's behavior here.
6073  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
6074  dampingFactor,
6075  Backward);
6076  }
6077  else {
6078  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
6079  D, rowIndices,
6080  dampingFactor,
6081  Forward);
6082  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
6083  D, rowIndices,
6084  dampingFactor,
6085  Backward);
6086 
6087  }
6088  }
6089  }
6090 
6091  if (copyBackOutput) {
6092  try {
6093  deep_copy (X , *X_domainMap); // Copy result back into X.
6094  } catch (std::exception& e) {
6095  TEUCHOS_TEST_FOR_EXCEPTION(
6096  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
6097  "threw an exception: " << e.what ());
6098  }
6099  }
6100  }
6101 
6102  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6103  template<class T>
6104  Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
6106  convert () const
6107  {
6108  using Teuchos::RCP;
6109  typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> output_matrix_type;
6110  const char tfecfFuncName[] = "convert: ";
6111 
6112  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6113  (! this->isFillComplete (), std::runtime_error, "This matrix (the source "
6114  "of the conversion) is not fill complete. You must first call "
6115  "fillComplete() (possibly with the domain and range Map) without an "
6116  "intervening call to resumeFill(), before you may call this method.");
6117  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6118  (! this->isStaticGraph (), std::logic_error, "This matrix (the source "
6119  "of the conversion) claims to be fill complete, but does not have a "
6120  "static (i.e., constant) graph. Please report this bug to the Tpetra "
6121  "developers.");
6122 
6123  RCP<output_matrix_type> newMatrix
6124  (new output_matrix_type (this->getCrsGraph ()));
6125  // Copy old values into new values. impl_scalar_type and T may
6126  // differ, so we can't use Kokkos::deep_copy.
6128  copyConvert (newMatrix->lclMatrix_->getLocalMatrix ().values,
6129  this->lclMatrix_->getLocalMatrix ().values);
6130  // Since newmat has a static (const) graph, the graph already has
6131  // a column Map, and Import and Export objects already exist (if
6132  // applicable). Thus, calling fillComplete is cheap.
6133  newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
6134 
6135  return newMatrix;
6136  }
6137 
6138 
6139  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6140  void
6143  {
6144  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6145  if (debug) {
6146  const char tfecfFuncName[] = "checkInternalState: ";
6147  const char err[] = "Internal state is not consistent. "
6148  "Please report this bug to the Tpetra developers.";
6149 
6150  // This version of the graph (RCP<const crs_graph_type>) must
6151  // always be nonnull.
6152  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6153  (staticGraph_.is_null (), std::logic_error, err);
6154  // myGraph == null means that the matrix has a const ("static")
6155  // graph. Otherwise, the matrix has a dynamic graph (it owns its
6156  // graph).
6157  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6158  (! myGraph_.is_null () && myGraph_ != staticGraph_,
6159  std::logic_error, err);
6160  // if matrix is fill complete, then graph must be fill complete
6161  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6162  (isFillComplete () && ! staticGraph_->isFillComplete (),
6163  std::logic_error, err << " Specifically, the matrix is fill complete, "
6164  "but its graph is NOT fill complete.");
6165  // if values are allocated and they are non-zero in number, then
6166  // one of the allocations should be present
6167  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6168  (staticGraph_->indicesAreAllocated () &&
6169  staticGraph_->getNodeAllocationSize() > 0 &&
6170  staticGraph_->getNodeNumRows() > 0 &&
6171  k_values1D_.extent (0) == 0,
6172  std::logic_error, err);
6173  }
6174  }
6175 
6176  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6177  std::string
6180  {
6181  std::ostringstream os;
6182 
6183  os << "Tpetra::CrsMatrix (Kokkos refactor): {";
6184  if (this->getObjectLabel () != "") {
6185  os << "Label: \"" << this->getObjectLabel () << "\", ";
6186  }
6187  if (isFillComplete ()) {
6188  os << "isFillComplete: true"
6189  << ", global dimensions: [" << getGlobalNumRows () << ", "
6190  << getGlobalNumCols () << "]"
6191  << ", global number of entries: " << getGlobalNumEntries ()
6192  << "}";
6193  }
6194  else {
6195  os << "isFillComplete: false"
6196  << ", global dimensions: [" << getGlobalNumRows () << ", "
6197  << getGlobalNumCols () << "]}";
6198  }
6199  return os.str ();
6200  }
6201 
6202  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6203  void
6205  describe (Teuchos::FancyOStream &out,
6206  const Teuchos::EVerbosityLevel verbLevel) const
6207  {
6208  using std::endl;
6209  using std::setw;
6210  using Teuchos::ArrayView;
6211  using Teuchos::Comm;
6212  using Teuchos::RCP;
6213  using Teuchos::TypeNameTraits;
6214  using Teuchos::VERB_DEFAULT;
6215  using Teuchos::VERB_NONE;
6216  using Teuchos::VERB_LOW;
6217  using Teuchos::VERB_MEDIUM;
6218  using Teuchos::VERB_HIGH;
6219  using Teuchos::VERB_EXTREME;
6220 
6221  const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
6222 
6223  if (vl == VERB_NONE) {
6224  return; // Don't print anything at all
6225  }
6226 
6227  // By convention, describe() always begins with a tab.
6228  Teuchos::OSTab tab0 (out);
6229 
6230  RCP<const Comm<int> > comm = this->getComm();
6231  const int myRank = comm->getRank();
6232  const int numProcs = comm->getSize();
6233  size_t width = 1;
6234  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
6235  ++width;
6236  }
6237  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
6238 
6239  // none: print nothing
6240  // low: print O(1) info from node 0
6241  // medium: print O(P) info, num entries per process
6242  // high: print O(N) info, num entries per row
6243  // extreme: print O(NNZ) info: print indices and values
6244  //
6245  // for medium and higher, print constituent objects at specified verbLevel
6246  if (myRank == 0) {
6247  out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
6248  }
6249  Teuchos::OSTab tab1 (out);
6250 
6251  if (myRank == 0) {
6252  if (this->getObjectLabel () != "") {
6253  out << "Label: \"" << this->getObjectLabel () << "\", ";
6254  }
6255  {
6256  out << "Template parameters:" << endl;
6257  Teuchos::OSTab tab2 (out);
6258  out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
6259  << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
6260  << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
6261  << "Node: " << TypeNameTraits<Node>::name () << endl;
6262  }
6263  if (isFillComplete()) {
6264  out << "isFillComplete: true" << endl
6265  << "Global dimensions: [" << getGlobalNumRows () << ", "
6266  << getGlobalNumCols () << "]" << endl
6267  << "Global number of entries: " << getGlobalNumEntries () << endl
6268  << endl << "Global max number of entries in a row: "
6269  << getGlobalMaxNumRowEntries () << endl;
6270  }
6271  else {
6272  out << "isFillComplete: false" << endl
6273  << "Global dimensions: [" << getGlobalNumRows () << ", "
6274  << getGlobalNumCols () << "]" << endl;
6275  }
6276  }
6277 
6278  if (vl < VERB_MEDIUM) {
6279  return; // all done!
6280  }
6281 
6282  // Describe the row Map.
6283  if (myRank == 0) {
6284  out << endl << "Row Map:" << endl;
6285  }
6286  if (getRowMap ().is_null ()) {
6287  if (myRank == 0) {
6288  out << "null" << endl;
6289  }
6290  }
6291  else {
6292  if (myRank == 0) {
6293  out << endl;
6294  }
6295  getRowMap ()->describe (out, vl);
6296  }
6297 
6298  // Describe the column Map.
6299  if (myRank == 0) {
6300  out << "Column Map: ";
6301  }
6302  if (getColMap ().is_null ()) {
6303  if (myRank == 0) {
6304  out << "null" << endl;
6305  }
6306  } else if (getColMap () == getRowMap ()) {
6307  if (myRank == 0) {
6308  out << "same as row Map" << endl;
6309  }
6310  } else {
6311  if (myRank == 0) {
6312  out << endl;
6313  }
6314  getColMap ()->describe (out, vl);
6315  }
6316 
6317  // Describe the domain Map.
6318  if (myRank == 0) {
6319  out << "Domain Map: ";
6320  }
6321  if (getDomainMap ().is_null ()) {
6322  if (myRank == 0) {
6323  out << "null" << endl;
6324  }
6325  } else if (getDomainMap () == getRowMap ()) {
6326  if (myRank == 0) {
6327  out << "same as row Map" << endl;
6328  }
6329  } else if (getDomainMap () == getColMap ()) {
6330  if (myRank == 0) {
6331  out << "same as column Map" << endl;
6332  }
6333  } else {
6334  if (myRank == 0) {
6335  out << endl;
6336  }
6337  getDomainMap ()->describe (out, vl);
6338  }
6339 
6340  // Describe the range Map.
6341  if (myRank == 0) {
6342  out << "Range Map: ";
6343  }
6344  if (getRangeMap ().is_null ()) {
6345  if (myRank == 0) {
6346  out << "null" << endl;
6347  }
6348  } else if (getRangeMap () == getDomainMap ()) {
6349  if (myRank == 0) {
6350  out << "same as domain Map" << endl;
6351  }
6352  } else if (getRangeMap () == getRowMap ()) {
6353  if (myRank == 0) {
6354  out << "same as row Map" << endl;
6355  }
6356  } else {
6357  if (myRank == 0) {
6358  out << endl;
6359  }
6360  getRangeMap ()->describe (out, vl);
6361  }
6362 
6363  // O(P) data
6364  for (int curRank = 0; curRank < numProcs; ++curRank) {
6365  if (myRank == curRank) {
6366  out << "Process rank: " << curRank << endl;
6367  Teuchos::OSTab tab2 (out);
6368  if (! staticGraph_->indicesAreAllocated ()) {
6369  out << "Graph indices not allocated" << endl;
6370  }
6371  else {
6372  out << "Number of allocated entries: "
6373  << staticGraph_->getNodeAllocationSize () << endl;
6374  }
6375  out << "Number of entries: " << getNodeNumEntries () << endl
6376  << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
6377  << endl;
6378  }
6379  // Give output time to complete by executing some barriers.
6380  comm->barrier ();
6381  comm->barrier ();
6382  comm->barrier ();
6383  }
6384 
6385  if (vl < VERB_HIGH) {
6386  return; // all done!
6387  }
6388 
6389  // O(N) and O(NNZ) data
6390  for (int curRank = 0; curRank < numProcs; ++curRank) {
6391  if (myRank == curRank) {
6392  out << std::setw(width) << "Proc Rank"
6393  << std::setw(width) << "Global Row"
6394  << std::setw(width) << "Num Entries";
6395  if (vl == VERB_EXTREME) {
6396  out << std::setw(width) << "(Index,Value)";
6397  }
6398  out << endl;
6399  for (size_t r = 0; r < getNodeNumRows (); ++r) {
6400  const size_t nE = getNumEntriesInLocalRow(r);
6401  GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
6402  out << std::setw(width) << myRank
6403  << std::setw(width) << gid
6404  << std::setw(width) << nE;
6405  if (vl == VERB_EXTREME) {
6406  if (isGloballyIndexed()) {
6407  ArrayView<const GlobalOrdinal> rowinds;
6408  ArrayView<const Scalar> rowvals;
6409  getGlobalRowView (gid, rowinds, rowvals);
6410  for (size_t j = 0; j < nE; ++j) {
6411  out << " (" << rowinds[j]
6412  << ", " << rowvals[j]
6413  << ") ";
6414  }
6415  }
6416  else if (isLocallyIndexed()) {
6417  ArrayView<const LocalOrdinal> rowinds;
6418  ArrayView<const Scalar> rowvals;
6419  getLocalRowView (r, rowinds, rowvals);
6420  for (size_t j=0; j < nE; ++j) {
6421  out << " (" << getColMap()->getGlobalElement(rowinds[j])
6422  << ", " << rowvals[j]
6423  << ") ";
6424  }
6425  } // globally or locally indexed
6426  } // vl == VERB_EXTREME
6427  out << endl;
6428  } // for each row r on this process
6429  } // if (myRank == curRank)
6430 
6431  // Give output time to complete
6432  comm->barrier ();
6433  comm->barrier ();
6434  comm->barrier ();
6435  } // for each process p
6436  }
6437 
6438  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6439  bool
6442  {
6443  // It's not clear what kind of compatibility checks on sizes can
6444  // be performed here. Epetra_CrsGraph doesn't check any sizes for
6445  // compatibility.
6446 
6447  // Currently, the source object must be a RowMatrix with the same
6448  // four template parameters as the target CrsMatrix. We might
6449  // relax this requirement later.
6451  const row_matrix_type* srcRowMat =
6452  dynamic_cast<const row_matrix_type*> (&source);
6453  return (srcRowMat != nullptr);
6454  }
6455 
6456  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6457  void
6460  const typename crs_graph_type::padding_type& padding,
6461  const bool verbose)
6462  {
6464  using Details::padCrsArrays;
6465  using std::endl;
6466  using LO = local_ordinal_type;
6467  using execution_space = typename device_type::execution_space;
6468  using row_ptrs_type =
6469  typename local_graph_type::row_map_type::non_const_type;
6470  using range_policy =
6471  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
6472  const char tfecfFuncName[] = "applyCrsPadding";
6473  const char suffix[] =
6474  ". Please report this bug to the Tpetra developers.";
6475  ProfilingRegion regionCAP("Tpetra::CrsMatrix::applyCrsPadding");
6476 
6477  std::unique_ptr<std::string> prefix;
6478  if (verbose) {
6479  prefix = this->createPrefix("CrsMatrix", tfecfFuncName);
6480  std::ostringstream os;
6481  os << *prefix << "padding: ";
6482  padding.print(os);
6483  os << endl;
6484  std::cerr << os.str();
6485  }
6486  const int myRank = ! verbose ? -1 : [&] () {
6487  auto map = this->getMap();
6488  if (map.is_null()) {
6489  return -1;
6490  }
6491  auto comm = map->getComm();
6492  if (comm.is_null()) {
6493  return -1;
6494  }
6495  return comm->getRank();
6496  } ();
6497 
6498  // NOTE (mfh 29 Jan 2020) This allocates the values array.
6499  if (! myGraph_->indicesAreAllocated()) {
6500  if (verbose) {
6501  std::ostringstream os;
6502  os << *prefix << "Call allocateIndices" << endl;
6503  std::cerr << os.str();
6504  }
6505  allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
6506  }
6507 
6508  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
6509  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
6510  // size needs to increase. That should be the job of
6511  // padCrsArrays.
6512 
6513  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
6514  // would use it directly.
6515 
6516  if (verbose) {
6517  std::ostringstream os;
6518  os << *prefix << "Allocate row_ptrs_beg: "
6519  << myGraph_->k_rowPtrs_.extent(0) << endl;
6520  std::cerr << os.str();
6521  }
6522  using Kokkos::view_alloc;
6523  using Kokkos::WithoutInitializing;
6524  row_ptrs_type row_ptr_beg(
6525  view_alloc("row_ptr_beg", WithoutInitializing),
6526  myGraph_->k_rowPtrs_.extent(0));
6527  Kokkos::deep_copy(row_ptr_beg, myGraph_->k_rowPtrs_);
6528 
6529  const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
6530  size_t(row_ptr_beg.extent(0) - 1);
6531  if (verbose) {
6532  std::ostringstream os;
6533  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
6534  std::cerr << os.str();
6535  }
6536  row_ptrs_type row_ptr_end(
6537  view_alloc("row_ptr_end", WithoutInitializing), N);
6538 
6539  const bool refill_num_row_entries =
6540  myGraph_->k_numRowEntries_.extent(0) != 0;
6541 
6542  if (refill_num_row_entries) { // unpacked storage
6543  // We can't assume correct *this capture until C++17, and it's
6544  // likely more efficient just to capture what we need anyway.
6545  auto num_row_entries = myGraph_->k_numRowEntries_;
6546  Kokkos::parallel_for
6547  ("Fill end row pointers", range_policy(0, N),
6548  KOKKOS_LAMBDA (const size_t i) {
6549  row_ptr_end(i) = row_ptr_beg(i) + num_row_entries(i);
6550  });
6551  }
6552  else {
6553  // FIXME (mfh 04 Feb 2020) Fix padCrsArrays so that if packed
6554  // storage, we don't need row_ptr_end to be separate allocation;
6555  // could just have it alias row_ptr_beg+1.
6556  Kokkos::parallel_for
6557  ("Fill end row pointers", range_policy(0, N),
6558  KOKKOS_LAMBDA (const size_t i) {
6559  row_ptr_end(i) = row_ptr_beg(i+1);
6560  });
6561  }
6562 
6563  if (myGraph_->isGloballyIndexed()) {
6564  padCrsArrays(row_ptr_beg, row_ptr_end, myGraph_->k_gblInds1D_,
6565  k_values1D_, padding, myRank, verbose);
6566  const auto newValuesLen = k_values1D_.extent(0);
6567  const auto newColIndsLen = myGraph_->k_gblInds1D_.extent(0);
6568  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6569  (newValuesLen != newColIndsLen, std::logic_error,
6570  ": After padding, k_values1D_.extent(0)=" << newValuesLen
6571  << " != myGraph_->k_gblInds1D_.extent(0)=" << newColIndsLen
6572  << suffix);
6573  }
6574  else {
6575  padCrsArrays(row_ptr_beg, row_ptr_end, myGraph_->k_lclInds1D_,
6576  k_values1D_, padding, myRank, verbose);
6577  const auto newValuesLen = k_values1D_.extent(0);
6578  const auto newColIndsLen = myGraph_->k_lclInds1D_.extent(0);
6579  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6580  (newValuesLen != newColIndsLen, std::logic_error,
6581  ": After padding, k_values1D_.extent(0)=" << newValuesLen
6582  << " != myGraph_->k_lclInds1D_.extent(0)=" << newColIndsLen
6583  << suffix);
6584  }
6585 
6586  if (refill_num_row_entries) {
6587  auto num_row_entries = myGraph_->k_numRowEntries_;
6588  Kokkos::parallel_for
6589  ("Fill num entries", range_policy(0, N),
6590  KOKKOS_LAMBDA (const size_t i) {
6591  num_row_entries(i) = row_ptr_end(i) - row_ptr_beg(i);
6592  });
6593  }
6594 
6595  if (verbose) {
6596  std::ostringstream os;
6597  os << *prefix << "Assign myGraph_->k_rowPtrs_; "
6598  << "old size: " << myGraph_->k_rowPtrs_.extent(0)
6599  << ", new size: " << row_ptr_beg.extent(0) << endl;
6600  std::cerr << os.str();
6601  TEUCHOS_ASSERT( myGraph_->k_rowPtrs_.extent(0) ==
6602  row_ptr_beg.extent(0) );
6603  }
6604  myGraph_->k_rowPtrs_ = row_ptr_beg;
6605  }
6606 
6607  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6608  void
6609  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6610  copyAndPermuteStaticGraph(
6611  const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
6612  const size_t numSameIDs,
6613  const LocalOrdinal permuteToLIDs[],
6614  const LocalOrdinal permuteFromLIDs[],
6615  const size_t numPermutes)
6616  {
6617  using Details::ProfilingRegion;
6618  using Teuchos::Array;
6619  using Teuchos::ArrayView;
6620  using std::endl;
6621  using LO = LocalOrdinal;
6622  using GO = GlobalOrdinal;
6623  const char tfecfFuncName[] = "copyAndPermuteStaticGraph";
6624  const char suffix[] =
6625  " Please report this bug to the Tpetra developers.";
6626  ProfilingRegion regionCAP
6627  ("Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
6628 
6629  const bool debug = Details::Behavior::debug("CrsGraph");
6630  const bool verbose = Details::Behavior::verbose("CrsGraph");
6631  std::unique_ptr<std::string> prefix;
6632  if (verbose) {
6633  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6634  std::ostringstream os;
6635  os << *prefix << "Start" << endl;
6636  }
6637  const char* const prefix_raw =
6638  verbose ? prefix.get()->c_str() : nullptr;
6639 
6640  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6641  //
6642  // Copy the first numSame row from source to target (this matrix).
6643  // This involves copying rows corresponding to LIDs [0, numSame-1].
6644  //
6645  const map_type& srcRowMap = * (srcMat.getRowMap ());
6646  Array<GO> rowInds;
6647  Array<Scalar> rowVals;
6648  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
6649  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6650  // Global ID for the current row index in the source matrix.
6651  // The first numSameIDs GIDs in the two input lists are the
6652  // same, so sourceGID == targetGID in this case.
6653  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6654  const GO targetGID = sourceGID;
6655 
6656  ArrayView<const GO> rowIndsConstView;
6657  ArrayView<const Scalar> rowValsConstView;
6658 
6659  if (sourceIsLocallyIndexed) {
6660  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6661  if (rowLength > static_cast<size_t> (rowInds.size())) {
6662  rowInds.resize (rowLength);
6663  rowVals.resize (rowLength);
6664  }
6665  // Resizing invalidates an Array's views, so we must make new
6666  // ones, even if rowLength hasn't changed.
6667  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6668  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6669 
6670  // The source matrix is locally indexed, so we have to get a
6671  // copy. Really it's the GIDs that have to be copied (because
6672  // they have to be converted from LIDs).
6673  size_t checkRowLength = 0;
6674  srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
6675  rowValsView, checkRowLength);
6676  if (debug) {
6677  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6678  (rowLength != checkRowLength, std::logic_error, "For "
6679  "global row index " << sourceGID << ", the source "
6680  "matrix's getNumEntriesInGlobalRow returns a row length "
6681  "of " << rowLength << ", but getGlobalRowCopy reports "
6682  "a row length of " << checkRowLength << "." << suffix);
6683  }
6684  rowIndsConstView = rowIndsView.view (0, rowLength);
6685  rowValsConstView = rowValsView.view (0, rowLength);
6686  }
6687  else { // source matrix is globally indexed.
6688  srcMat.getGlobalRowView(sourceGID, rowIndsConstView,
6689  rowValsConstView);
6690  }
6691 
6692  // Applying a permutation to a matrix with a static graph
6693  // means REPLACE-ing entries.
6694  combineGlobalValues(targetGID, rowIndsConstView,
6695  rowValsConstView, REPLACE,
6696  prefix_raw, debug, verbose);
6697  }
6698 
6699  if (verbose) {
6700  std::ostringstream os;
6701  os << *prefix << "Do permutes" << endl;
6702  }
6703 
6704  const map_type& tgtRowMap = * (this->getRowMap ());
6705  for (size_t p = 0; p < numPermutes; ++p) {
6706  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6707  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6708 
6709  ArrayView<const GO> rowIndsConstView;
6710  ArrayView<const Scalar> rowValsConstView;
6711 
6712  if (sourceIsLocallyIndexed) {
6713  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6714  if (rowLength > static_cast<size_t> (rowInds.size ())) {
6715  rowInds.resize (rowLength);
6716  rowVals.resize (rowLength);
6717  }
6718  // Resizing invalidates an Array's views, so we must make new
6719  // ones, even if rowLength hasn't changed.
6720  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6721  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6722 
6723  // The source matrix is locally indexed, so we have to get a
6724  // copy. Really it's the GIDs that have to be copied (because
6725  // they have to be converted from LIDs).
6726  size_t checkRowLength = 0;
6727  srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6728  rowValsView, checkRowLength);
6729  if (debug) {
6730  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6731  (rowLength != checkRowLength, std::logic_error, "For "
6732  "source matrix global row index " << sourceGID << ", "
6733  "getNumEntriesInGlobalRow returns a row length of " <<
6734  rowLength << ", but getGlobalRowCopy a row length of "
6735  << checkRowLength << "." << suffix);
6736  }
6737  rowIndsConstView = rowIndsView.view (0, rowLength);
6738  rowValsConstView = rowValsView.view (0, rowLength);
6739  }
6740  else {
6741  srcMat.getGlobalRowView(sourceGID, rowIndsConstView,
6742  rowValsConstView);
6743  }
6744 
6745  combineGlobalValues(targetGID, rowIndsConstView,
6746  rowValsConstView, REPLACE,
6747  prefix_raw, debug, verbose);
6748  }
6749 
6750  if (verbose) {
6751  std::ostringstream os;
6752  os << *prefix << "Done" << endl;
6753  }
6754  }
6755 
6756  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6757  void
6758  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6759  copyAndPermuteNonStaticGraph(
6760  const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
6761  const size_t numSameIDs,
6762  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
6763  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
6764  const size_t numPermutes)
6765  {
6766  using Details::ProfilingRegion;
6767  using Teuchos::Array;
6768  using Teuchos::ArrayView;
6769  using std::endl;
6770  using LO = LocalOrdinal;
6771  using GO = GlobalOrdinal;
6772  const char tfecfFuncName[] = "copyAndPermuteNonStaticGraph";
6773  const char suffix[] =
6774  " Please report this bug to the Tpetra developers.";
6775  ProfilingRegion regionCAP
6776  ("Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
6777 
6778  const bool debug = Details::Behavior::debug("CrsGraph");
6779  const bool verbose = Details::Behavior::verbose("CrsGraph");
6780  std::unique_ptr<std::string> prefix;
6781  if (verbose) {
6782  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6783  std::ostringstream os;
6784  os << *prefix << "Start" << endl;
6785  }
6786  const char* const prefix_raw =
6787  verbose ? prefix.get()->c_str() : nullptr;
6788 
6789  {
6790  using row_graph_type = RowGraph<LO, GO, Node>;
6791  const row_graph_type& srcGraph = *(srcMat.getGraph());
6792  auto padding =
6793  myGraph_->computeCrsPadding(srcGraph, numSameIDs,
6794  permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
6795  applyCrsPadding(*padding, verbose);
6796  }
6797  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
6798  //
6799  // Copy the first numSame row from source to target (this matrix).
6800  // This involves copying rows corresponding to LIDs [0, numSame-1].
6801  //
6802  const map_type& srcRowMap = * (srcMat.getRowMap ());
6803  Array<GO> rowInds;
6804  Array<Scalar> rowVals;
6805  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
6806  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
6807  // Global ID for the current row index in the source matrix.
6808  // The first numSameIDs GIDs in the two input lists are the
6809  // same, so sourceGID == targetGID in this case.
6810  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
6811  const GO targetGID = sourceGID;
6812 
6813  ArrayView<const GO> rowIndsConstView;
6814  ArrayView<const Scalar> rowValsConstView;
6815 
6816  if (sourceIsLocallyIndexed) {
6817  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6818  if (rowLength > static_cast<size_t> (rowInds.size())) {
6819  rowInds.resize (rowLength);
6820  rowVals.resize (rowLength);
6821  }
6822  // Resizing invalidates an Array's views, so we must make new
6823  // ones, even if rowLength hasn't changed.
6824  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6825  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6826 
6827  // The source matrix is locally indexed, so we have to get a
6828  // copy. Really it's the GIDs that have to be copied (because
6829  // they have to be converted from LIDs).
6830  size_t checkRowLength = 0;
6831  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
6832  checkRowLength);
6833  if (debug) {
6834  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6835  (rowLength != checkRowLength, std::logic_error, ": For "
6836  "global row index " << sourceGID << ", the source "
6837  "matrix's getNumEntriesInGlobalRow returns a row length "
6838  "of " << rowLength << ", but getGlobalRowCopy reports "
6839  "a row length of " << checkRowLength << "." << suffix);
6840  }
6841  rowIndsConstView = rowIndsView.view (0, rowLength);
6842  rowValsConstView = rowValsView.view (0, rowLength);
6843  }
6844  else { // source matrix is globally indexed.
6845  srcMat.getGlobalRowView(sourceGID, rowIndsConstView,
6846  rowValsConstView);
6847  }
6848 
6849  // Combine the data into the target matrix.
6850  insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6851  rowValsConstView, prefix_raw, debug, verbose);
6852  }
6853 
6854  if (verbose) {
6855  std::ostringstream os;
6856  os << *prefix << "Do permutes" << endl;
6857  }
6858  const LO* const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
6859  const LO* const permuteToLIDs = permuteToLIDs_dv.view_host().data();
6860 
6861  const map_type& tgtRowMap = * (this->getRowMap ());
6862  for (size_t p = 0; p < numPermutes; ++p) {
6863  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
6864  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
6865 
6866  ArrayView<const GO> rowIndsConstView;
6867  ArrayView<const Scalar> rowValsConstView;
6868 
6869  if (sourceIsLocallyIndexed) {
6870  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
6871  if (rowLength > static_cast<size_t> (rowInds.size ())) {
6872  rowInds.resize (rowLength);
6873  rowVals.resize (rowLength);
6874  }
6875  // Resizing invalidates an Array's views, so we must make new
6876  // ones, even if rowLength hasn't changed.
6877  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
6878  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
6879 
6880  // The source matrix is locally indexed, so we have to get a
6881  // copy. Really it's the GIDs that have to be copied (because
6882  // they have to be converted from LIDs).
6883  size_t checkRowLength = 0;
6884  srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
6885  rowValsView, checkRowLength);
6886  if (debug) {
6887  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6888  (rowLength != checkRowLength, std::logic_error, "For "
6889  "source matrix global row index " << sourceGID << ", "
6890  "getNumEntriesInGlobalRow returns a row length of " <<
6891  rowLength << ", but getGlobalRowCopy a row length of "
6892  << checkRowLength << "." << suffix);
6893  }
6894  rowIndsConstView = rowIndsView.view (0, rowLength);
6895  rowValsConstView = rowValsView.view (0, rowLength);
6896  }
6897  else {
6898  srcMat.getGlobalRowView(sourceGID, rowIndsConstView,
6899  rowValsConstView);
6900  }
6901 
6902  // Combine the data into the target matrix.
6903  insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
6904  rowValsConstView, prefix_raw, debug, verbose);
6905  }
6906 
6907  if (verbose) {
6908  std::ostringstream os;
6909  os << *prefix << "Done" << endl;
6910  }
6911  }
6912 
6913  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6914  void
6915  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6916  copyAndPermute(
6917  const SrcDistObject& srcObj,
6918  const size_t numSameIDs,
6919  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6920  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs)
6921  {
6922  using Details::Behavior;
6924  using Details::ProfilingRegion;
6925  using std::endl;
6926 
6927  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
6928  const char tfecfFuncName[] = "copyAndPermute: ";
6929  ProfilingRegion regionCAP("Tpetra::CrsMatrix::copyAndPermute");
6930 
6931  const bool verbose = Behavior::verbose("CrsMatrix");
6932  std::unique_ptr<std::string> prefix;
6933  if (verbose) {
6934  prefix = this->createPrefix("CrsMatrix", "copyAndPermute");
6935  std::ostringstream os;
6936  os << *prefix << endl
6937  << *prefix << " numSameIDs: " << numSameIDs << endl
6938  << *prefix << " numPermute: " << permuteToLIDs.extent(0)
6939  << endl
6940  << *prefix << " "
6941  << dualViewStatusToString (permuteToLIDs, "permuteToLIDs")
6942  << endl
6943  << *prefix << " "
6944  << dualViewStatusToString (permuteFromLIDs, "permuteFromLIDs")
6945  << endl
6946  << *prefix << " "
6947  << "isStaticGraph: " << (isStaticGraph() ? "true" : "false")
6948  << endl;
6949  std::cerr << os.str ();
6950  }
6951 
6952  const auto numPermute = permuteToLIDs.extent (0);
6953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6954  (numPermute != permuteFromLIDs.extent (0),
6955  std::invalid_argument, "permuteToLIDs.extent(0) = "
6956  << numPermute << "!= permuteFromLIDs.extent(0) = "
6957  << permuteFromLIDs.extent (0) << ".");
6958 
6959  // This dynamic cast should succeed, because we've already tested
6960  // it in checkSizes().
6961  using RMT = RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
6962  const RMT& srcMat = dynamic_cast<const RMT&> (srcObj);
6963  if (isStaticGraph ()) {
6964  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6965  auto permuteToLIDs_h = permuteToLIDs.view_host ();
6966  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6967  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6968 
6969  copyAndPermuteStaticGraph(srcMat, numSameIDs,
6970  permuteToLIDs_h.data(),
6971  permuteFromLIDs_h.data(),
6972  numPermute);
6973  }
6974  else {
6975  copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6976  permuteFromLIDs, numPermute);
6977  }
6978 
6979  if (verbose) {
6980  std::ostringstream os;
6981  os << *prefix << "Done" << endl;
6982  std::cerr << os.str();
6983  }
6984  }
6985 
6986  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
6987  void
6988  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6989  packAndPrepare
6990  (const SrcDistObject& source,
6991  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6992  Kokkos::DualView<char*, buffer_device_type>& exports,
6993  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6994  size_t& constantNumPackets,
6995  Distributor& distor)
6996  {
6997  using Details::Behavior;
6999  using Details::ProfilingRegion;
7000  using Teuchos::outArg;
7001  using Teuchos::REDUCE_MAX;
7002  using Teuchos::reduceAll;
7003  using std::endl;
7004  typedef LocalOrdinal LO;
7005  typedef GlobalOrdinal GO;
7006  const char tfecfFuncName[] = "packAndPrepare: ";
7007  ProfilingRegion regionPAP ("Tpetra::CrsMatrix::packAndPrepare");
7008 
7009  const bool debug = Behavior::debug("CrsMatrix");
7010  const bool verbose = Behavior::verbose("CrsMatrix");
7011 
7012  // Processes on which the communicator is null should not participate.
7013  Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
7014  if (pComm.is_null ()) {
7015  return;
7016  }
7017  const Teuchos::Comm<int>& comm = *pComm;
7018  const int myRank = comm.getSize ();
7019 
7020  std::unique_ptr<std::string> prefix;
7021  if (verbose) {
7022  prefix = this->createPrefix("CrsMatrix", "packAndPrepare");
7023  std::ostringstream os;
7024  os << *prefix << "Start" << endl
7025  << *prefix << " "
7026  << dualViewStatusToString (exportLIDs, "exportLIDs")
7027  << endl
7028  << *prefix << " "
7029  << dualViewStatusToString (exports, "exports")
7030  << endl
7031  << *prefix << " "
7032  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7033  << endl;
7034  std::cerr << os.str ();
7035  }
7036 
7037  // Attempt to cast the source object to CrsMatrix. If successful,
7038  // use the source object's packNew() method to pack its data for
7039  // communication. Otherwise, attempt to cast to RowMatrix; if
7040  // successful, use the source object's pack() method. Otherwise,
7041  // the source object doesn't have the right type.
7042  //
7043  // FIXME (mfh 30 Jun 2013, 11 Sep 2017) We don't even need the
7044  // RowMatrix to have the same Node type. Unfortunately, we don't
7045  // have a way to ask if the RowMatrix is "a RowMatrix with any
7046  // Node type," since RowMatrix doesn't have a base class. A
7047  // hypothetical RowMatrixBase<Scalar, LO, GO> class, which does
7048  // not currently exist, would satisfy this requirement.
7049  //
7050  // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
7051  // type doesn't technically need to match the target object's
7052  // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
7053  // and GO need not be the same, as long as there is no overflow of
7054  // the indices. However, checking for index overflow is global
7055  // and therefore undesirable.
7056 
7057  std::ostringstream msg; // for collecting error messages
7058  int lclBad = 0; // to be set below
7059 
7060  using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
7061  const crs_matrix_type* srcCrsMat =
7062  dynamic_cast<const crs_matrix_type*> (&source);
7063  if (srcCrsMat != nullptr) {
7064  if (verbose) {
7065  std::ostringstream os;
7066  os << *prefix << "Source matrix same (CrsMatrix) type as target; "
7067  "calling packNew" << endl;
7068  std::cerr << os.str ();
7069  }
7070  try {
7071  srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
7072  constantNumPackets, distor);
7073  }
7074  catch (std::exception& e) {
7075  lclBad = 1;
7076  msg << "Proc " << myRank << ": " << e.what () << std::endl;
7077  }
7078  }
7079  else {
7080  using Kokkos::HostSpace;
7081  using Kokkos::subview;
7082  using exports_type = Kokkos::DualView<char*, buffer_device_type>;
7083  using range_type = Kokkos::pair<size_t, size_t>;
7084 
7085  if (verbose) {
7086  std::ostringstream os;
7087  os << *prefix << "Source matrix NOT same (CrsMatrix) type as target"
7088  << endl;
7089  std::cerr << os.str ();
7090  }
7091 
7092  using row_matrix_type = RowMatrix<Scalar, LO, GO, Node>;
7093  const row_matrix_type* srcRowMat =
7094  dynamic_cast<const row_matrix_type*> (&source);
7095  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7096  (srcRowMat == nullptr, std::invalid_argument,
7097  "The source object of the Import or Export operation is neither a "
7098  "CrsMatrix (with the same template parameters as the target object), "
7099  "nor a RowMatrix (with the same first four template parameters as the "
7100  "target object).");
7101 
7102  // For the RowMatrix case, we need to convert from
7103  // Kokkos::DualView to Teuchos::Array*. This doesn't need to be
7104  // so terribly efficient, since packing a non-CrsMatrix
7105  // RowMatrix for Import/Export into a CrsMatrix is not a
7106  // critical case. Thus, we may allocate Teuchos::Array objects
7107  // here and copy to and from Kokkos::*View.
7108 
7109  // View exportLIDs's host data as a Teuchos::ArrayView.
7110  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
7111  auto exportLIDs_h = exportLIDs.view_host ();
7112  Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
7113  exportLIDs_h.size ());
7114 
7115  // pack() will allocate exports_a as needed. We'll copy back
7116  // into exports (after (re)allocating exports if needed) below.
7117  Teuchos::Array<char> exports_a;
7118 
7119  // View exportLIDs' host data as a Teuchos::ArrayView. We don't
7120  // need to sync, since we're doing write-only access, but we do
7121  // need to mark the DualView as modified on host.
7122 
7123  numPacketsPerLID.clear_sync_state (); // write-only access
7124  numPacketsPerLID.modify_host ();
7125  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
7126  Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
7127  numPacketsPerLID_h.size ());
7128 
7129  // Invoke RowMatrix's legacy pack() interface, using above
7130  // Teuchos::Array* objects.
7131  try {
7132  srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
7133  constantNumPackets, distor);
7134  }
7135  catch (std::exception& e) {
7136  lclBad = 1;
7137  msg << "Proc " << myRank << ": " << e.what () << std::endl;
7138  }
7139 
7140  // Allocate 'exports', and copy exports_a back into it.
7141  const size_t newAllocSize = static_cast<size_t> (exports_a.size ());
7142  if (static_cast<size_t> (exports.extent (0)) < newAllocSize) {
7143  const std::string oldLabel = exports.d_view.label ();
7144  const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
7145  exports = exports_type (newLabel, newAllocSize);
7146  }
7147  // It's safe to assume that we're working on host anyway, so
7148  // just keep exports sync'd to host.
7149  // ignore current device contents
7150  exports.modify_host();
7151 
7152  auto exports_h = exports.view_host ();
7153  auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
7154 
7155  // Kokkos::deep_copy needs a Kokkos::View input, so turn
7156  // exports_a into a nonowning Kokkos::View first before copying.
7157  typedef typename exports_type::t_host::execution_space HES;
7158  typedef Kokkos::Device<HES, HostSpace> host_device_type;
7159  Kokkos::View<const char*, host_device_type>
7160  exports_a_kv (exports_a.getRawPtr (), newAllocSize);
7161  Kokkos::deep_copy (exports_h_sub, exports_a_kv);
7162  }
7163 
7164  if (debug) {
7165  int gblBad = 0; // output argument; to be set below
7166  reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
7167  if (gblBad != 0) {
7168  Tpetra::Details::gathervPrint (std::cerr, msg.str (), comm);
7169  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7170  (true, std::logic_error, "packNew() or pack() threw an exception on "
7171  "one or more participating processes.");
7172  }
7173  }
7174  else {
7175  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7176  (lclBad != 0, std::logic_error, "packNew threw an exception on one "
7177  "or more participating processes. Here is this process' error "
7178  "message: " << msg.str ());
7179  }
7180 
7181  if (verbose) {
7182  std::ostringstream os;
7183  os << *prefix << "packAndPrepare: Done!" << endl
7184  << *prefix << " "
7185  << dualViewStatusToString (exportLIDs, "exportLIDs")
7186  << endl
7187  << *prefix << " "
7188  << dualViewStatusToString (exports, "exports")
7189  << endl
7190  << *prefix << " "
7191  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7192  << endl;
7193  std::cerr << os.str ();
7194  }
7195  }
7196 
7197  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7198  size_t
7199  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7200  packRow (char exports[],
7201  const size_t offset,
7202  const size_t numEnt,
7203  const GlobalOrdinal gidsIn[],
7204  const impl_scalar_type valsIn[],
7205  const size_t numBytesPerValue) const
7206  {
7207  using Kokkos::View;
7208  using Kokkos::subview;
7210  typedef LocalOrdinal LO;
7211  typedef GlobalOrdinal GO;
7212  typedef impl_scalar_type ST;
7213 
7214  if (numEnt == 0) {
7215  // Empty rows always take zero bytes, to ensure sparsity.
7216  return 0;
7217  }
7218 
7219  const GO gid = 0; // packValueCount wants this
7220  const LO numEntLO = static_cast<size_t> (numEnt);
7221 
7222  const size_t numEntBeg = offset;
7223  const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
7224  const size_t gidsBeg = numEntBeg + numEntLen;
7225  const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
7226  const size_t valsBeg = gidsBeg + gidsLen;
7227  const size_t valsLen = numEnt * numBytesPerValue;
7228 
7229  char* const numEntOut = exports + numEntBeg;
7230  char* const gidsOut = exports + gidsBeg;
7231  char* const valsOut = exports + valsBeg;
7232 
7233  size_t numBytesOut = 0;
7234  int errorCode = 0;
7235  numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
7236 
7237  {
7238  Kokkos::pair<int, size_t> p;
7239  p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
7240  errorCode += p.first;
7241  numBytesOut += p.second;
7242 
7243  p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
7244  errorCode += p.first;
7245  numBytesOut += p.second;
7246  }
7247 
7248  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
7249  TEUCHOS_TEST_FOR_EXCEPTION
7250  (numBytesOut != expectedNumBytes, std::logic_error, "packRow: "
7251  "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
7252  << expectedNumBytes << ".");
7253  TEUCHOS_TEST_FOR_EXCEPTION
7254  (errorCode != 0, std::runtime_error, "packRow: "
7255  "PackTraits::packArray returned a nonzero error code");
7256 
7257  return numBytesOut;
7258  }
7259 
7260  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7261  size_t
7262  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7263  unpackRow (GlobalOrdinal gidsOut[],
7264  impl_scalar_type valsOut[],
7265  const char imports[],
7266  const size_t offset,
7267  const size_t numBytes,
7268  const size_t numEnt,
7269  const size_t numBytesPerValue)
7270  {
7271  using Kokkos::View;
7272  using Kokkos::subview;
7274  typedef LocalOrdinal LO;
7275  typedef GlobalOrdinal GO;
7276  typedef impl_scalar_type ST;
7277 
7278  Details::ProfilingRegion region_upack_row(
7279  "Tpetra::CrsMatrix::unpackRow",
7280  "Import/Export"
7281  );
7282 
7283  if (numBytes == 0) {
7284  // Rows with zero bytes should always have zero entries.
7285  if (numEnt != 0) {
7286  const int myRank = this->getMap ()->getComm ()->getRank ();
7287  TEUCHOS_TEST_FOR_EXCEPTION
7288  (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
7289  "unpackRow: The number of bytes to unpack numBytes=0, but the "
7290  "number of entries to unpack (as reported by numPacketsPerLID) "
7291  "for this row numEnt=" << numEnt << " != 0.");
7292  }
7293  return 0;
7294  }
7295 
7296  if (numEnt == 0 && numBytes != 0) {
7297  const int myRank = this->getMap ()->getComm ()->getRank ();
7298  TEUCHOS_TEST_FOR_EXCEPTION
7299  (true, std::logic_error, "(Proc " << myRank << ") CrsMatrix::"
7300  "unpackRow: The number of entries to unpack (as reported by "
7301  "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
7302  "numBytes=" << numBytes << " != 0.");
7303  }
7304 
7305  const GO gid = 0; // packValueCount wants this
7306  const LO lid = 0; // packValueCount wants this
7307 
7308  const size_t numEntBeg = offset;
7309  const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
7310  const size_t gidsBeg = numEntBeg + numEntLen;
7311  const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
7312  const size_t valsBeg = gidsBeg + gidsLen;
7313  const size_t valsLen = numEnt * numBytesPerValue;
7314 
7315  const char* const numEntIn = imports + numEntBeg;
7316  const char* const gidsIn = imports + gidsBeg;
7317  const char* const valsIn = imports + valsBeg;
7318 
7319  size_t numBytesOut = 0;
7320  int errorCode = 0;
7321  LO numEntOut;
7322  numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
7323  if (static_cast<size_t> (numEntOut) != numEnt ||
7324  numEntOut == static_cast<LO> (0)) {
7325  const int myRank = this->getMap ()->getComm ()->getRank ();
7326  std::ostringstream os;
7327  os << "(Proc " << myRank << ") CrsMatrix::unpackRow: ";
7328  bool firstErrorCondition = false;
7329  if (static_cast<size_t> (numEntOut) != numEnt) {
7330  os << "Number of entries from numPacketsPerLID numEnt=" << numEnt
7331  << " does not equal number of entries unpacked from imports "
7332  "buffer numEntOut=" << numEntOut << ".";
7333  firstErrorCondition = true;
7334  }
7335  if (numEntOut == static_cast<LO> (0)) {
7336  if (firstErrorCondition) {
7337  os << " Also, ";
7338  }
7339  os << "Number of entries unpacked from imports buffer numEntOut=0, "
7340  "but number of bytes to unpack for this row numBytes=" << numBytes
7341  << " != 0. This should never happen, since packRow should only "
7342  "ever pack rows with a nonzero number of entries. In this case, "
7343  "the number of entries from numPacketsPerLID is numEnt=" << numEnt
7344  << ".";
7345  }
7346  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, os.str ());
7347  }
7348 
7349  {
7350  Kokkos::pair<int, size_t> p;
7351  p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
7352  errorCode += p.first;
7353  numBytesOut += p.second;
7354 
7355  p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
7356  errorCode += p.first;
7357  numBytesOut += p.second;
7358  }
7359 
7360  TEUCHOS_TEST_FOR_EXCEPTION
7361  (numBytesOut != numBytes, std::logic_error, "unpackRow: numBytesOut = "
7362  << numBytesOut << " != numBytes = " << numBytes << ".");
7363 
7364  const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
7365  TEUCHOS_TEST_FOR_EXCEPTION
7366  (numBytesOut != expectedNumBytes, std::logic_error, "unpackRow: "
7367  "numBytesOut = " << numBytesOut << " != expectedNumBytes = "
7368  << expectedNumBytes << ".");
7369 
7370  TEUCHOS_TEST_FOR_EXCEPTION
7371  (errorCode != 0, std::runtime_error, "unpackRow: "
7372  "PackTraits::unpackArray returned a nonzero error code");
7373 
7374  return numBytesOut;
7375  }
7376 
7377  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7378  void
7379  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7380  allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
7381  size_t& totalNumEntries,
7382  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs) const
7383  {
7384  using Details::Behavior;
7386  using std::endl;
7387  typedef impl_scalar_type IST;
7388  typedef LocalOrdinal LO;
7389  typedef GlobalOrdinal GO;
7390  //const char tfecfFuncName[] = "allocatePackSpaceNew: ";
7391 
7392  // mfh 18 Oct 2017: Set TPETRA_VERBOSE to true for copious debug
7393  // output to std::cerr on every MPI process. This is unwise for
7394  // runs with large numbers of MPI processes.
7395  const bool verbose = Behavior::verbose("CrsMatrix");
7396  std::unique_ptr<std::string> prefix;
7397  if (verbose) {
7398  prefix = this->createPrefix("CrsMatrix", "allocatePackSpaceNew");
7399  std::ostringstream os;
7400  os << *prefix << "Before:"
7401  << endl
7402  << *prefix << " "
7403  << dualViewStatusToString (exports, "exports")
7404  << endl
7405  << *prefix << " "
7406  << dualViewStatusToString (exportLIDs, "exportLIDs")
7407  << endl;
7408  std::cerr << os.str ();
7409  }
7410 
7411  // The number of export LIDs must fit in LocalOrdinal, assuming
7412  // that the LIDs are distinct and valid on the calling process.
7413  const LO numExportLIDs = static_cast<LO> (exportLIDs.extent (0));
7414 
7415  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
7416  auto exportLIDs_h = exportLIDs.view_host ();
7417 
7418  // Count the total number of matrix entries to send.
7419  totalNumEntries = 0;
7420  for (LO i = 0; i < numExportLIDs; ++i) {
7421  const LO lclRow = exportLIDs_h[i];
7422  size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
7423  // FIXME (mfh 25 Jan 2015) We should actually report invalid row
7424  // indices as an error. Just consider them nonowned for now.
7425  if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
7426  curNumEntries = 0;
7427  }
7428  totalNumEntries += curNumEntries;
7429  }
7430 
7431  // FIXME (mfh 24 Feb 2013, 24 Mar 2017) This code is only correct
7432  // if sizeof(IST) is a meaningful representation of the amount of
7433  // data in a Scalar instance. (LO and GO are always built-in
7434  // integer types.)
7435  //
7436  // Allocate the exports array. It does NOT need padding for
7437  // alignment, since we use memcpy to write to / read from send /
7438  // receive buffers.
7439  const size_t allocSize =
7440  static_cast<size_t> (numExportLIDs) * sizeof (LO) +
7441  totalNumEntries * (sizeof (IST) + sizeof (GO));
7442  if (static_cast<size_t> (exports.extent (0)) < allocSize) {
7443  using exports_type = Kokkos::DualView<char*, buffer_device_type>;
7444 
7445  const std::string oldLabel = exports.d_view.label ();
7446  const std::string newLabel = (oldLabel == "") ? "exports" : oldLabel;
7447  exports = exports_type (newLabel, allocSize);
7448  }
7449 
7450  if (verbose) {
7451  std::ostringstream os;
7452  os << *prefix << "After:"
7453  << endl
7454  << *prefix << " "
7455  << dualViewStatusToString (exports, "exports")
7456  << endl
7457  << *prefix << " "
7458  << dualViewStatusToString (exportLIDs, "exportLIDs")
7459  << endl;
7460  std::cerr << os.str ();
7461  }
7462  }
7463 
7464  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7465  void
7467  packNew (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
7468  Kokkos::DualView<char*, buffer_device_type>& exports,
7469  const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7470  size_t& constantNumPackets,
7471  Distributor& dist) const
7472  {
7473  // The call to packNew in packAndPrepare catches and handles any exceptions.
7474  Details::ProfilingRegion region_pack_new("Tpetra::CrsMatrix::packNew", "Import/Export");
7475  if (this->isStaticGraph ()) {
7477  packCrsMatrixNew (*this, exports, numPacketsPerLID, exportLIDs,
7478  constantNumPackets, dist);
7479  }
7480  else {
7481  this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
7482  constantNumPackets, dist);
7483  }
7484  }
7485 
7486  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7487  void
7489  packNonStaticNew (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
7490  Kokkos::DualView<char*, buffer_device_type>& exports,
7491  const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
7492  size_t& constantNumPackets,
7493  Distributor& /* distor */) const
7494  {
7495  using Details::Behavior;
7497  using Details::PackTraits;
7499  using Kokkos::View;
7500  using std::endl;
7501  using LO = LocalOrdinal;
7502  using GO = GlobalOrdinal;
7503  using ST = impl_scalar_type;
7504  using HES =
7505  typename View<int*, device_type>::HostMirror::execution_space;
7506  const char tfecfFuncName[] = "packNonStaticNew: ";
7507 
7508  const bool verbose = Behavior::verbose("CrsMatrix");
7509  std::unique_ptr<std::string> prefix;
7510  if (verbose) {
7511  prefix = this->createPrefix("CrsMatrix", "packNonStaticNew");
7512  std::ostringstream os;
7513  os << *prefix << "Start" << endl;
7514  std::cerr << os.str ();
7515  }
7516 
7517  const size_t numExportLIDs = static_cast<size_t> (exportLIDs.extent (0));
7518  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7519  (numExportLIDs != static_cast<size_t> (numPacketsPerLID.extent (0)),
7520  std::invalid_argument, "exportLIDs.size() = " << numExportLIDs
7521  << " != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
7522  << ".");
7523 
7524  // Setting this to zero tells the caller to expect a possibly
7525  // different ("nonconstant") number of packets per local index
7526  // (i.e., a possibly different number of entries per row).
7527  constantNumPackets = 0;
7528 
7529  // The pack buffer 'exports' enters this method possibly
7530  // unallocated. Do the first two parts of "Count, allocate, fill,
7531  // compute."
7532  size_t totalNumEntries = 0;
7533  this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
7534  const size_t bufSize = static_cast<size_t> (exports.extent (0));
7535 
7536  // Write-only host access
7537  exports.clear_sync_state();
7538  exports.modify_host();
7539  auto exports_h = exports.view_host ();
7540  if (verbose) {
7541  std::ostringstream os;
7542  os << *prefix << "After marking exports as modified on host, "
7543  << dualViewStatusToString (exports, "exports") << endl;
7544  std::cerr << os.str ();
7545  }
7546 
7547  // Read-only host access
7548  auto exportLIDs_h = exportLIDs.view_host ();
7549 
7550  // Write-only host access
7551  const_cast<Kokkos::DualView<size_t*, buffer_device_type>*>(&numPacketsPerLID)->clear_sync_state();
7552  const_cast<Kokkos::DualView<size_t*, buffer_device_type>*>(&numPacketsPerLID)->modify_host();
7553  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
7554 
7555  // Compute the number of "packets" (in this case, bytes) per
7556  // export LID (in this case, local index of the row to send), and
7557  // actually pack the data.
7558  size_t offset = 0; // current index into 'exports' array.
7559  for (size_t i = 0; i < numExportLIDs; ++i) {
7560  const LO lclRow = exportLIDs_h[i];
7561 
7562  size_t numEnt;
7563  numEnt = this->getNumEntriesInLocalRow (lclRow);
7564 
7565  // Only pack this row's data if it has a nonzero number of
7566  // entries. We can do this because receiving processes get the
7567  // number of packets, and will know that zero packets means zero
7568  // entries.
7569  if (numEnt == 0) {
7570  numPacketsPerLID_h[i] = 0;
7571  continue;
7572  }
7573 
7574  // Temporary buffer for global column indices.
7575  using Details::ScalarViewTraits;
7576  View<GO*, HES> gidsIn_k =
7577  ScalarViewTraits<GO, HES>::allocateArray (GO (0), numEnt, "gids");
7578 
7579  Teuchos::ArrayView<const Scalar> valsIn;
7580  if (this->isLocallyIndexed ()) {
7581  // If the matrix is locally indexed on the calling process, we
7582  // have to use its column Map (which it _must_ have in this
7583  // case) to convert to global indices.
7584  Teuchos::ArrayView<const LO> lidsIn;
7585  this->getLocalRowView (lclRow, lidsIn, valsIn);
7586  const map_type& colMap = * (this->getColMap ());
7587  for (size_t k = 0; k < numEnt; ++k) {
7588  gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
7589  }
7590  }
7591  else if (this->isGloballyIndexed ()) {
7592  // If the matrix is globally indexed on the calling process,
7593  // then we can use the column indices directly. However, we
7594  // have to get the global row index. The calling process must
7595  // have a row Map, since otherwise it shouldn't be participating
7596  // in packing operations.
7597  Teuchos::ArrayView<const GO> gblIndView;;
7598  const map_type& rowMap = * (this->getRowMap ());
7599  const GO gblRow = rowMap.getGlobalElement (lclRow);
7600  this->getGlobalRowView (gblRow, gblIndView, valsIn);
7601  for (size_t k = 0; k < numEnt; ++k) {
7602  gidsIn_k[k] = gblIndView[k];
7603  }
7604  }
7605  // mfh 11 Sep 2017: Currently, if the matrix is neither globally
7606  // nor locally indexed, then it has no entries. Therefore,
7607  // there is nothing to pack. No worries!
7608 
7609  typename HES::device_type outputDevice;
7610  auto valsIn_k =
7612  reinterpret_cast<const ST*> (valsIn.getRawPtr ()),
7613  valsIn.size (),
7614  true, "valsIn");
7615  const size_t numBytesPerValue =
7616  PackTraits<ST>::packValueCount (valsIn[0]);
7617  const size_t numBytes =
7618  this->packRow (exports_h.data (), offset, numEnt, gidsIn_k.data (),
7619  valsIn_k.data (), numBytesPerValue);
7620  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7621  (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
7622  "First invalid offset into 'exports' pack buffer at index i = " << i
7623  << ". exportLIDs_h[i]: " << exportLIDs_h[i] << ", bufSize: " <<
7624  bufSize << ", offset: " << offset << ", numBytes: " << numBytes <<
7625  ".");
7626  // numPacketsPerLID_h[i] is the number of "packets" in the
7627  // current local row i. Packet=char (really "byte") so use the
7628  // number of bytes of the packed data for that row.
7629  numPacketsPerLID_h[i] = numBytes;
7630  offset += numBytes;
7631  }
7632 
7633  if (verbose) {
7634  std::ostringstream os;
7635  os << *prefix << "Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
7636  << *prefix << " "
7637  << dualViewStatusToString (exports, "exports")
7638  << endl
7639  << *prefix << " "
7640  << dualViewStatusToString (exportLIDs, "exportLIDs")
7641  << endl;
7642  std::cerr << os.str ();
7643  }
7644  }
7645 
7646  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7647  LocalOrdinal
7648  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7649  combineGlobalValuesRaw(const LocalOrdinal lclRow,
7650  const LocalOrdinal numEnt,
7651  const impl_scalar_type vals[],
7652  const GlobalOrdinal cols[],
7653  const Tpetra::CombineMode combMode,
7654  const char* const prefix,
7655  const bool debug,
7656  const bool verbose)
7657  {
7658  using GO = GlobalOrdinal;
7659 
7660  // mfh 23 Mar 2017: This branch is not thread safe in a debug
7661  // build, due to use of Teuchos::ArrayView; see #229.
7662  const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
7663  Teuchos::ArrayView<const GO> cols_av
7664  (numEnt == 0 ? nullptr : cols, numEnt);
7665  Teuchos::ArrayView<const Scalar> vals_av
7666  (numEnt == 0 ? nullptr : reinterpret_cast<const Scalar*> (vals), numEnt);
7667 
7668  // FIXME (mfh 23 Mar 2017) This is a work-around for less common
7669  // combine modes. combineGlobalValues throws on error; it does
7670  // not return an error code. Thus, if it returns, it succeeded.
7671  combineGlobalValues(gblRow, cols_av, vals_av, combMode,
7672  prefix, debug, verbose);
7673  return numEnt;
7674  }
7675 
7676  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7677  void
7678  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7679  combineGlobalValues(
7680  const GlobalOrdinal globalRowIndex,
7681  const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
7682  const Teuchos::ArrayView<const Scalar>& values,
7683  const Tpetra::CombineMode combineMode,
7684  const char* const prefix,
7685  const bool debug,
7686  const bool verbose)
7687  {
7688  const char tfecfFuncName[] = "combineGlobalValues: ";
7689 
7690  if (isStaticGraph ()) {
7691  // INSERT doesn't make sense for a static graph, since you
7692  // aren't allowed to change the structure of the graph.
7693  // However, all the other combine modes work.
7694  if (combineMode == ADD) {
7695  sumIntoGlobalValues (globalRowIndex, columnIndices, values);
7696  }
7697  else if (combineMode == REPLACE) {
7698  replaceGlobalValues (globalRowIndex, columnIndices, values);
7699  }
7700  else if (combineMode == ABSMAX) {
7701  using ::Tpetra::Details::AbsMax;
7702  AbsMax<Scalar> f;
7703  this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
7704  columnIndices,
7705  values, f);
7706  }
7707  else if (combineMode == INSERT) {
7708  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7709  (isStaticGraph() && combineMode == INSERT,
7710  std::invalid_argument, "INSERT combine mode is forbidden "
7711  "if the matrix has a static (const) graph (i.e., was "
7712  "constructed with the CrsMatrix constructor that takes a "
7713  "const CrsGraph pointer).");
7714  }
7715  else {
7716  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7717  (true, std::logic_error, "Invalid combine mode; should "
7718  "never get here! "
7719  "Please report this bug to the Tpetra developers.");
7720  }
7721  }
7722  else { // The matrix has a dynamic graph.
7723  if (combineMode == ADD || combineMode == INSERT) {
7724  // For a dynamic graph, all incoming column indices are
7725  // inserted into the target graph. Duplicate indices will
7726  // have their values summed. In this context, ADD and INSERT
7727  // are equivalent. We need to call insertGlobalValues()
7728  // anyway if the column indices don't yet exist in this row,
7729  // so we just call insertGlobalValues() for both cases.
7730  insertGlobalValuesFilteredChecked(globalRowIndex,
7731  columnIndices, values, prefix, debug, verbose);
7732  }
7733  // FIXME (mfh 14 Mar 2012):
7734  //
7735  // Implementing ABSMAX or REPLACE for a dynamic graph would
7736  // require modifying assembly to attach a possibly different
7737  // combine mode to each inserted (i, j, A_ij) entry. For
7738  // example, consider two different Export operations to the same
7739  // target CrsMatrix, the first with ABSMAX combine mode and the
7740  // second with REPLACE. This isn't a common use case, so we
7741  // won't mess with it for now.
7742  else if (combineMode == ABSMAX) {
7743  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7744  ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
7745  "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
7746  "implemented.");
7747  }
7748  else if (combineMode == REPLACE) {
7749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7750  ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
7751  "REPLACE combine mode when the matrix has a dynamic graph is not yet "
7752  "implemented.");
7753  }
7754  else {
7755  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
7756  true, std::logic_error, "Should never get here! Please report this "
7757  "bug to the Tpetra developers.");
7758  }
7759  }
7760  }
7761 
7762  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7763  void
7766  (const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
7767  Kokkos::DualView<char*, buffer_device_type> imports,
7768  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7769  const size_t constantNumPackets,
7770  Distributor& distor,
7771  const CombineMode combineMode)
7772  {
7773  using Details::Behavior;
7776  using std::endl;
7777  const char tfecfFuncName[] = "unpackAndCombine: ";
7778  ProfilingRegion regionUAC ("Tpetra::CrsMatrix::unpackAndCombine");
7779 
7780  const bool debug = Behavior::debug("CrsMatrix");
7781  const bool verbose = Behavior::verbose("CrsMatrix");
7782  constexpr int numValidModes = 5;
7783  const CombineMode validModes[numValidModes] =
7784  {ADD, REPLACE, ABSMAX, INSERT, ZERO};
7785  const char* validModeNames[numValidModes] =
7786  {"ADD", "REPLACE", "ABSMAX", "INSERT", "ZERO"};
7787 
7788  std::unique_ptr<std::string> prefix;
7789  if (verbose) {
7790  prefix = this->createPrefix("CrsMatrix", "unpackAndCombine");
7791  std::ostringstream os;
7792  os << *prefix << "Start:" << endl
7793  << *prefix << " "
7794  << dualViewStatusToString (importLIDs, "importLIDs")
7795  << endl
7796  << *prefix << " "
7797  << dualViewStatusToString (imports, "imports")
7798  << endl
7799  << *prefix << " "
7800  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7801  << endl
7802  << *prefix << " constantNumPackets: " << constantNumPackets
7803  << endl
7804  << *prefix << " combineMode: " << combineModeToString (combineMode)
7805  << endl;
7806  std::cerr << os.str ();
7807  }
7808 
7809  if (debug) {
7810  if (std::find (validModes, validModes+numValidModes, combineMode) ==
7811  validModes+numValidModes) {
7812  std::ostringstream os;
7813  os << "Invalid combine mode. Valid modes are {";
7814  for (int k = 0; k < numValidModes; ++k) {
7815  os << validModeNames[k];
7816  if (k < numValidModes - 1) {
7817  os << ", ";
7818  }
7819  }
7820  os << "}.";
7821  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7822  (true, std::invalid_argument, os.str ());
7823  }
7824  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7825  (importLIDs.extent(0) != numPacketsPerLID.extent(0),
7826  std::invalid_argument, "importLIDs.extent(0)="
7827  << importLIDs.extent(0)
7828  << " != numPacketsPerLID.extent(0)="
7829  << numPacketsPerLID.extent(0) << ".");
7830  }
7831 
7832  if (combineMode == ZERO) {
7833  return; // nothing to do
7834  }
7835 
7836  if (debug) {
7837  using Teuchos::reduceAll;
7838  std::unique_ptr<std::ostringstream> msg (new std::ostringstream ());
7839  int lclBad = 0;
7840  try {
7841  unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7842  constantNumPackets, distor, combineMode,
7843  verbose);
7844  } catch (std::exception& e) {
7845  lclBad = 1;
7846  *msg << e.what ();
7847  }
7848  int gblBad = 0;
7849  const Teuchos::Comm<int>& comm = * (this->getComm ());
7850  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
7851  lclBad, Teuchos::outArg (gblBad));
7852  if (gblBad != 0) {
7853  // mfh 22 Oct 2017: 'prefix' might be null, since it is only
7854  // initialized in a debug build. Thus, we get the process
7855  // rank again here. This is an error message, so the small
7856  // run-time cost doesn't matter. See #1887.
7857  std::ostringstream os;
7858  os << "Proc " << comm.getRank () << ": " << msg->str () << endl;
7859  msg = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
7860  ::Tpetra::Details::gathervPrint (*msg, os.str (), comm);
7861  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7862  (true, std::logic_error, std::endl << "unpackAndCombineImpl "
7863  "threw an exception on one or more participating processes: "
7864  << endl << msg->str ());
7865  }
7866  }
7867  else {
7868  unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
7869  constantNumPackets, distor, combineMode,
7870  verbose);
7871  }
7872 
7873  if (verbose) {
7874  std::ostringstream os;
7875  os << *prefix << "Done!" << endl
7876  << *prefix << " "
7877  << dualViewStatusToString (importLIDs, "importLIDs")
7878  << endl
7879  << *prefix << " "
7880  << dualViewStatusToString (imports, "imports")
7881  << endl
7882  << *prefix << " "
7883  << dualViewStatusToString (numPacketsPerLID, "numPacketsPerLID")
7884  << endl;
7885  std::cerr << os.str ();
7886  }
7887  }
7888 
7889  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7890  void
7893  const Kokkos::DualView<const local_ordinal_type*,
7894  buffer_device_type>& importLIDs,
7895  Kokkos::DualView<char*, buffer_device_type> imports,
7896  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7897  const size_t constantNumPackets,
7898  Distributor & distor,
7899  const CombineMode combineMode,
7900  const bool verbose)
7901  {
7902  Details::ProfilingRegion region_unpack_and_combine_impl(
7903  "Tpetra::CrsMatrix::unpackAndCombineImpl",
7904  "Import/Export"
7905  );
7906  using std::endl;
7907  const char tfecfFuncName[] = "unpackAndCombineImpl";
7908  std::unique_ptr<std::string> prefix;
7909  if (verbose) {
7910  prefix = this->createPrefix("CrsMatrix", tfecfFuncName);
7911  std::ostringstream os;
7912  os << *prefix << "isStaticGraph(): "
7913  << (isStaticGraph() ? "true" : "false")
7914  << ", importLIDs.extent(0): "
7915  << importLIDs.extent(0)
7916  << ", imports.extent(0): "
7917  << imports.extent(0)
7918  << ", numPacketsPerLID.extent(0): "
7919  << numPacketsPerLID.extent(0)
7920  << endl;
7921  std::cerr << os.str();
7922  }
7923 
7924  if (isStaticGraph ()) {
7925  using Details::unpackCrsMatrixAndCombineNew;
7926  unpackCrsMatrixAndCombineNew(*this, imports, numPacketsPerLID,
7927  importLIDs, constantNumPackets,
7928  distor, combineMode);
7929  }
7930  else {
7931  {
7932  using padding_type = typename crs_graph_type::padding_type;
7933  std::unique_ptr<padding_type> padding;
7934  try {
7935  padding = myGraph_->computePaddingForCrsMatrixUnpack(
7936  importLIDs, imports, numPacketsPerLID, verbose);
7937  }
7938  catch (std::exception& e) {
7939  const auto rowMap = getRowMap();
7940  const auto comm = rowMap.is_null() ? Teuchos::null :
7941  rowMap->getComm();
7942  const int myRank = comm.is_null() ? -1 : comm->getRank();
7943  TEUCHOS_TEST_FOR_EXCEPTION
7944  (true, std::runtime_error, "Proc " << myRank << ": "
7945  "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7946  "threw an exception: " << e.what());
7947  }
7948  if (verbose) {
7949  std::ostringstream os;
7950  os << *prefix << "Call applyCrsPadding" << endl;
7951  std::cerr << os.str();
7952  }
7953  applyCrsPadding(*padding, verbose);
7954  }
7955  if (verbose) {
7956  std::ostringstream os;
7957  os << *prefix << "Call unpackAndCombineImplNonStatic" << endl;
7958  std::cerr << os.str();
7959  }
7960  unpackAndCombineImplNonStatic(importLIDs, imports,
7961  numPacketsPerLID,
7962  constantNumPackets,
7963  distor, combineMode);
7964  }
7965 
7966  if (verbose) {
7967  std::ostringstream os;
7968  os << *prefix << "Done" << endl;
7969  std::cerr << os.str();
7970  }
7971  }
7972 
7973  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
7974  void
7975  CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7976  unpackAndCombineImplNonStatic(
7977  const Kokkos::DualView<const local_ordinal_type*,
7978  buffer_device_type>& importLIDs,
7979  Kokkos::DualView<char*, buffer_device_type> imports,
7980  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7981  const size_t constantNumPackets,
7982  Distributor& distor,
7983  const CombineMode combineMode)
7984  {
7985  using Kokkos::View;
7986  using Kokkos::subview;
7987  using Kokkos::MemoryUnmanaged;
7988  using Details::Behavior;
7991  using Details::PackTraits;
7992  using Details::ScalarViewTraits;
7993  using std::endl;
7994  using LO = LocalOrdinal;
7995  using GO = GlobalOrdinal;
7996  using ST = impl_scalar_type;
7997  using size_type = typename Teuchos::ArrayView<LO>::size_type;
7998  using HES =
7999  typename View<int*, device_type>::HostMirror::execution_space;
8000  using pair_type = std::pair<typename View<int*, HES>::size_type,
8001  typename View<int*, HES>::size_type>;
8002  using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
8003  using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
8004  const char tfecfFuncName[] = "unpackAndCombineImplNonStatic";
8005 
8006  const bool debug = Behavior::debug("CrsMatrix");
8007  const bool verbose = Behavior::verbose("CrsMatrix");
8008  std::unique_ptr<std::string> prefix;
8009  if (verbose) {
8010  prefix = this->createPrefix("CrsMatrix", tfecfFuncName);
8011  std::ostringstream os;
8012  os << *prefix << endl; // we've already printed DualViews' statuses
8013  std::cerr << os.str ();
8014  }
8015  const char* const prefix_raw =
8016  verbose ? prefix.get()->c_str() : nullptr;
8017 
8018  const size_type numImportLIDs = importLIDs.extent (0);
8019  if (combineMode == ZERO || numImportLIDs == 0) {
8020  return; // nothing to do; no need to combine entries
8021  }
8022 
8023  Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
8024  "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
8025  "Import/Export"
8026  );
8027 
8028  // We're unpacking on host. This is read-only host access.
8029  if (imports.need_sync_host()) {
8030  imports.sync_host ();
8031  }
8032  auto imports_h = imports.view_host();
8033 
8034  // Read-only host access.
8035  if (numPacketsPerLID.need_sync_host()) {
8036  numPacketsPerLID.sync_host ();
8037  }
8038  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
8039 
8040  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
8041  auto importLIDs_h = importLIDs.view_host();
8042 
8043  size_t numBytesPerValue;
8044  {
8045  // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
8046  // with run-time size? We already assume that all entries in both the
8047  // source and target matrices have the same size. If the calling process
8048  // owns at least one entry in either matrix, we can use that entry to set
8049  // the size. However, it is possible that the calling process owns no
8050  // entries. In that case, we're in trouble. One way to fix this would be
8051  // for each row's data to contain the run-time size. This is only
8052  // necessary if the size is not a compile-time constant.
8053  Scalar val;
8054  numBytesPerValue = PackTraits<ST>::packValueCount (val);
8055  }
8056 
8057  // Determine the maximum number of entries in any one row
8058  size_t offset = 0;
8059  size_t maxRowNumEnt = 0;
8060  for (size_type i = 0; i < numImportLIDs; ++i) {
8061  const size_t numBytes = numPacketsPerLID_h[i];
8062  if (numBytes == 0) {
8063  continue; // empty buffer for that row means that the row is empty
8064  }
8065  // We need to unpack a nonzero number of entries for this row.
8066  if (debug) {
8067  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
8068  (offset + numBytes > size_t(imports_h.extent (0)),
8069  std::logic_error, ": At local row index importLIDs_h[i="
8070  << i << "]=" << importLIDs_h[i] << ", offset (=" << offset
8071  << ") + numBytes (=" << numBytes << ") > "
8072  "imports_h.extent(0)=" << imports_h.extent (0) << ".");
8073  }
8074  LO numEntLO = 0;
8075 
8076  if (debug) {
8077  const size_t theNumBytes =
8078  PackTraits<LO>::packValueCount (numEntLO);
8079  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
8080  (theNumBytes > numBytes, std::logic_error, ": theNumBytes="
8081  << theNumBytes << " > numBytes = " << numBytes << ".");
8082  }
8083  const char* const inBuf = imports_h.data () + offset;
8084  const size_t actualNumBytes =
8085  PackTraits<LO>::unpackValue (numEntLO, inBuf);
8086 
8087  if (debug) {
8088  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
8089  (actualNumBytes > numBytes, std::logic_error, ": At i=" << i
8090  << ", actualNumBytes=" << actualNumBytes
8091  << " > numBytes=" << numBytes << ".");
8092  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
8093  (numEntLO == 0, std::logic_error, ": At local row index "
8094  "importLIDs_h[i=" << i << "]=" << importLIDs_h[i] << ", "
8095  "the number of entries read from the packed data is "
8096  "numEntLO=" << numEntLO << ", but numBytes=" << numBytes
8097  << " != 0.");
8098  }
8099 
8100  maxRowNumEnt = std::max(size_t(numEntLO), maxRowNumEnt);
8101  offset += numBytes;
8102  }
8103 
8104  // Temporary space to cache incoming global column indices and
8105  // values. Column indices come in as global indices, in case the
8106  // source object's column Map differs from the target object's
8107  // (this's) column Map.
8108  View<GO*, HES> gblColInds;
8109  View<LO*, HES> lclColInds;
8110  View<ST*, HES> vals;
8111  {
8112  GO gid = 0;
8113  LO lid = 0;
8114  // FIXME (mfh 17 Feb 2015, tjf 2 Aug 2017) What do I do about Scalar types
8115  // with run-time size? We already assume that all entries in both the
8116  // source and target matrices have the same size. If the calling process
8117  // owns at least one entry in either matrix, we can use that entry to set
8118  // the size. However, it is possible that the calling process owns no
8119  // entries. In that case, we're in trouble. One way to fix this would be
8120  // for each row's data to contain the run-time size. This is only
8121  // necessary if the size is not a compile-time constant.
8122  Scalar val;
8123  gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
8124  gid, maxRowNumEnt, "gids");
8125  lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
8126  lid, maxRowNumEnt, "lids");
8127  vals = ScalarViewTraits<ST, HES>::allocateArray(
8128  val, maxRowNumEnt, "vals");
8129  }
8130 
8131  offset = 0;
8132  for (size_type i = 0; i < numImportLIDs; ++i) {
8133  const size_t numBytes = numPacketsPerLID_h[i];
8134  if (numBytes == 0) {
8135  continue; // empty buffer for that row means that the row is empty
8136  }
8137  LO numEntLO = 0;
8138  const char* const inBuf = imports_h.data () + offset;
8139  (void) PackTraits<LO>::unpackValue (numEntLO, inBuf);
8140 
8141  const size_t numEnt = static_cast<size_t>(numEntLO);;
8142  const LO lclRow = importLIDs_h[i];
8143 
8144  gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
8145  vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
8146 
8147  const size_t numBytesOut =
8148  unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
8149  offset, numBytes, numEnt, numBytesPerValue);
8150  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
8151  (numBytes != numBytesOut, std::logic_error, ": At i=" << i
8152  << ", numBytes=" << numBytes << " != numBytesOut="
8153  << numBytesOut << ".");
8154 
8155  const ST* const valsRaw = const_cast<const ST*> (valsOut.data ());
8156  const GO* const gidsRaw = const_cast<const GO*> (gidsOut.data ());
8157  combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
8158  combineMode, prefix_raw, debug, verbose);
8159  // Don't update offset until current LID has succeeded.
8160  offset += numBytes;
8161  } // for each import LID i
8162 
8163  if (verbose) {
8164  std::ostringstream os;
8165  os << *prefix << "Done" << endl;
8166  std::cerr << os.str();
8167  }
8168  }
8169 
8170  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8171  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
8173  getColumnMapMultiVector (const MV& X_domainMap,
8174  const bool force) const
8175  {
8176  using Teuchos::null;
8177  using Teuchos::RCP;
8178  using Teuchos::rcp;
8179 
8180  TEUCHOS_TEST_FOR_EXCEPTION(
8181  ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
8182  "MapMultiVector: You may only call this method if the matrix has a "
8183  "column Map. If the matrix does not yet have a column Map, you should "
8184  "first call fillComplete (with domain and range Map if necessary).");
8185 
8186  // If the graph is not fill complete, then the Import object (if
8187  // one should exist) hasn't been constructed yet.
8188  TEUCHOS_TEST_FOR_EXCEPTION(
8189  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
8190  "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
8191  "this matrix's graph is fill complete.");
8192 
8193  const size_t numVecs = X_domainMap.getNumVectors ();
8194  RCP<const import_type> importer = this->getGraph ()->getImporter ();
8195  RCP<const map_type> colMap = this->getColMap ();
8196 
8197  RCP<MV> X_colMap; // null by default
8198 
8199  // If the Import object is trivial (null), then we don't need a
8200  // separate column Map multivector. Just return null in that
8201  // case. The caller is responsible for knowing not to use the
8202  // returned null pointer.
8203  //
8204  // If the Import is nontrivial, then we do need a separate
8205  // column Map multivector for the Import operation. Check in
8206  // that case if we have to (re)create the column Map
8207  // multivector.
8208  if (! importer.is_null () || force) {
8209  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
8210  X_colMap = rcp (new MV (colMap, numVecs));
8211 
8212  // Cache the newly created multivector for later reuse.
8213  importMV_ = X_colMap;
8214  }
8215  else { // Yay, we can reuse the cached multivector!
8216  X_colMap = importMV_;
8217  // mfh 09 Jan 2013: We don't have to fill with zeros first,
8218  // because the Import uses INSERT combine mode, which overwrites
8219  // existing entries.
8220  //
8221  //X_colMap->putScalar (ZERO);
8222  }
8223  }
8224  return X_colMap;
8225  }
8226 
8227  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8228  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
8231  const bool force) const
8232  {
8233  using Teuchos::null;
8234  using Teuchos::RCP;
8235  using Teuchos::rcp;
8236 
8237  // If the graph is not fill complete, then the Export object (if
8238  // one should exist) hasn't been constructed yet.
8239  TEUCHOS_TEST_FOR_EXCEPTION(
8240  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
8241  "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
8242  "matrix's graph is fill complete.");
8243 
8244  const size_t numVecs = Y_rangeMap.getNumVectors ();
8245  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
8246  // Every version of the constructor takes either a row Map, or a
8247  // graph (all of whose constructors take a row Map). Thus, the
8248  // matrix always has a row Map.
8249  RCP<const map_type> rowMap = this->getRowMap ();
8250 
8251  RCP<MV> Y_rowMap; // null by default
8252 
8253  // If the Export object is trivial (null), then we don't need a
8254  // separate row Map multivector. Just return null in that case.
8255  // The caller is responsible for knowing not to use the returned
8256  // null pointer.
8257  //
8258  // If the Export is nontrivial, then we do need a separate row
8259  // Map multivector for the Export operation. Check in that case
8260  // if we have to (re)create the row Map multivector.
8261  if (! exporter.is_null () || force) {
8262  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
8263  Y_rowMap = rcp (new MV (rowMap, numVecs));
8264  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
8265  }
8266  else { // Yay, we can reuse the cached multivector!
8267  Y_rowMap = exportMV_;
8268  }
8269  }
8270  return Y_rowMap;
8271  }
8272 
8273  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8274  void
8276  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
8277  {
8278  TEUCHOS_TEST_FOR_EXCEPTION(
8279  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
8280  "removeEmptyProcessesInPlace: This method does not work when the matrix "
8281  "was created with a constant graph (that is, when it was created using "
8282  "the version of its constructor that takes an RCP<const CrsGraph>). "
8283  "This is because the matrix is not allowed to modify the graph in that "
8284  "case, but removing empty processes requires modifying the graph.");
8285  myGraph_->removeEmptyProcessesInPlace (newMap);
8286  // Even though CrsMatrix's row Map (as returned by getRowMap())
8287  // comes from its CrsGraph, CrsMatrix still implements DistObject,
8288  // so we also have to change the DistObject's Map.
8289  this->map_ = this->getRowMap ();
8290  // In the nonconst graph case, staticGraph_ is just a const
8291  // pointer to myGraph_. This assignment is probably redundant,
8292  // but it doesn't hurt.
8293  staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
8294  }
8295 
8296  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8297  Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
8299  add (const Scalar& alpha,
8301  const Scalar& beta,
8302  const Teuchos::RCP<const map_type>& domainMap,
8303  const Teuchos::RCP<const map_type>& rangeMap,
8304  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8305  {
8306  using Teuchos::Array;
8307  using Teuchos::ArrayView;
8308  using Teuchos::ParameterList;
8309  using Teuchos::RCP;
8310  using Teuchos::rcp;
8311  using Teuchos::rcp_implicit_cast;
8312  using Teuchos::sublist;
8313  using std::endl;
8314  using LO = local_ordinal_type;
8315  using GO = global_ordinal_type;
8316  using row_matrix_type =
8318  using crs_matrix_type =
8320  const char errPfx[] = "Tpetra::CrsMatrix::add: ";
8321 
8322  const bool debug = Details::Behavior::debug("CrsMatrix");
8323  const bool verbose = Details::Behavior::verbose("CrsMatrix");
8324  std::unique_ptr<std::string> prefix;
8325  if (verbose) {
8326  prefix = this->createPrefix("CrsMatrix", "add");
8327  std::ostringstream os;
8328  os << *prefix << "Start" << endl;
8329  std::cerr << os.str ();
8330  }
8331 
8332  const crs_matrix_type& B = *this; // a convenient abbreviation
8333  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
8334  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
8335 
8336  // If the user didn't supply a domain or range Map, then try to
8337  // get one from B first (if it has them), then from A (if it has
8338  // them). If we don't have any domain or range Maps, scold the
8339  // user.
8340  RCP<const map_type> A_domainMap = A.getDomainMap ();
8341  RCP<const map_type> A_rangeMap = A.getRangeMap ();
8342  RCP<const map_type> B_domainMap = B.getDomainMap ();
8343  RCP<const map_type> B_rangeMap = B.getRangeMap ();
8344 
8345  RCP<const map_type> theDomainMap = domainMap;
8346  RCP<const map_type> theRangeMap = rangeMap;
8347 
8348  if (domainMap.is_null ()) {
8349  if (B_domainMap.is_null ()) {
8350  TEUCHOS_TEST_FOR_EXCEPTION(
8351  A_domainMap.is_null (), std::invalid_argument,
8352  "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
8353  "then you must supply a nonnull domain Map to this method.");
8354  theDomainMap = A_domainMap;
8355  } else {
8356  theDomainMap = B_domainMap;
8357  }
8358  }
8359  if (rangeMap.is_null ()) {
8360  if (B_rangeMap.is_null ()) {
8361  TEUCHOS_TEST_FOR_EXCEPTION(
8362  A_rangeMap.is_null (), std::invalid_argument,
8363  "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
8364  "then you must supply a nonnull range Map to this method.");
8365  theRangeMap = A_rangeMap;
8366  } else {
8367  theRangeMap = B_rangeMap;
8368  }
8369  }
8370 
8371  if (debug) {
8372  // In debug mode, check that A and B have matching domain and
8373  // range Maps, if they have domain and range Maps at all. (If
8374  // they aren't fill complete, then they may not yet have them.)
8375  if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
8376  if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
8377  TEUCHOS_TEST_FOR_EXCEPTION
8378  (! B_domainMap->isSameAs(*A_domainMap),
8379  std::invalid_argument,
8380  errPfx << "The input RowMatrix A must have a domain Map "
8381  "which is the same as (isSameAs) this RowMatrix's "
8382  "domain Map.");
8383  TEUCHOS_TEST_FOR_EXCEPTION
8384  (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
8385  errPfx << "The input RowMatrix A must have a range Map "
8386  "which is the same as (isSameAs) this RowMatrix's range "
8387  "Map.");
8388  TEUCHOS_TEST_FOR_EXCEPTION
8389  (! domainMap.is_null() &&
8390  ! domainMap->isSameAs(*B_domainMap),
8391  std::invalid_argument,
8392  errPfx << "The input domain Map must be the same as "
8393  "(isSameAs) this RowMatrix's domain Map.");
8394  TEUCHOS_TEST_FOR_EXCEPTION
8395  (! rangeMap.is_null() &&
8396  ! rangeMap->isSameAs(*B_rangeMap),
8397  std::invalid_argument,
8398  errPfx << "The input range Map must be the same as "
8399  "(isSameAs) this RowMatrix's range Map.");
8400  }
8401  }
8402  else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
8403  TEUCHOS_TEST_FOR_EXCEPTION
8404  (! domainMap.is_null() &&
8405  ! domainMap->isSameAs(*B_domainMap),
8406  std::invalid_argument,
8407  errPfx << "The input domain Map must be the same as "
8408  "(isSameAs) this RowMatrix's domain Map.");
8409  TEUCHOS_TEST_FOR_EXCEPTION
8410  (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
8411  std::invalid_argument,
8412  errPfx << "The input range Map must be the same as "
8413  "(isSameAs) this RowMatrix's range Map.");
8414  }
8415  else {
8416  TEUCHOS_TEST_FOR_EXCEPTION
8417  (domainMap.is_null() || rangeMap.is_null(),
8418  std::invalid_argument, errPfx << "If neither A nor B "
8419  "have a domain and range Map, then you must supply a "
8420  "nonnull domain and range Map to this method.");
8421  }
8422  }
8423 
8424  // What parameters do we pass to C's constructor? Do we call
8425  // fillComplete on C after filling it? And if so, what parameters
8426  // do we pass to C's fillComplete call?
8427  bool callFillComplete = true;
8428  RCP<ParameterList> constructorSublist;
8429  RCP<ParameterList> fillCompleteSublist;
8430  if (! params.is_null()) {
8431  callFillComplete =
8432  params->get("Call fillComplete", callFillComplete);
8433  constructorSublist = sublist(params, "Constructor parameters");
8434  fillCompleteSublist = sublist(params, "fillComplete parameters");
8435  }
8436 
8437  RCP<const map_type> A_rowMap = A.getRowMap ();
8438  RCP<const map_type> B_rowMap = B.getRowMap ();
8439  RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
8440  RCP<crs_matrix_type> C; // The result matrix.
8441 
8442  // If A and B's row Maps are the same, we can compute an upper
8443  // bound on the number of entries in each row of C, before
8444  // actually computing the sum. A reasonable upper bound is the
8445  // sum of the two entry counts in each row. If we choose this as
8446  // the actual per-row upper bound, we can use static profile.
8447  if (A_rowMap->isSameAs (*B_rowMap)) {
8448  const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8449  Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
8450 
8451  // Get the number of entries in each row of A.
8452  if (alpha != ZERO) {
8453  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8454  const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8455  C_maxNumEntriesPerRow[localRow] += A_numEntries;
8456  }
8457  }
8458  // Get the number of entries in each row of B.
8459  if (beta != ZERO) {
8460  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
8461  const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8462  C_maxNumEntriesPerRow[localRow] += B_numEntries;
8463  }
8464  }
8465  // Construct the result matrix C.
8466  if (constructorSublist.is_null ()) {
8467  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
8468  StaticProfile));
8469  } else {
8470  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
8471  StaticProfile, constructorSublist));
8472  }
8473  // Since A and B have the same row Maps, we could add them
8474  // together all at once and merge values before we call
8475  // insertGlobalValues. However, we don't really need to, since
8476  // we've already allocated enough space in each row of C for C
8477  // to do the merge itself.
8478  }
8479  else { // the row Maps of A and B are not the same
8480  // Construct the result matrix C.
8481  // true: !A_rowMap->isSameAs (*B_rowMap)
8482  TEUCHOS_TEST_FOR_EXCEPTION
8483  (true, std::invalid_argument, errPfx << "The row maps must "
8484  "be the same for statically allocated matrices, to ensure "
8485  "that there is sufficient space to do the addition.");
8486  }
8487 
8488  TEUCHOS_TEST_FOR_EXCEPTION
8489  (C.is_null (), std::logic_error,
8490  errPfx << "C should not be null at this point. "
8491  "Please report this bug to the Tpetra developers.");
8492 
8493  if (verbose) {
8494  std::ostringstream os;
8495  os << *prefix << "Compute C = alpha*A + beta*B" << endl;
8496  std::cerr << os.str ();
8497  }
8498 
8499  Array<GO> ind;
8500  Array<Scalar> val;
8501 
8502  if (alpha != ZERO) {
8503  const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
8504  for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
8505  size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
8506  const GO globalRow = A_rowMap->getGlobalElement (localRow);
8507  if (A_numEntries > static_cast<size_t> (ind.size ())) {
8508  ind.resize (A_numEntries);
8509  val.resize (A_numEntries);
8510  }
8511  ArrayView<GO> indView = ind (0, A_numEntries);
8512  ArrayView<Scalar> valView = val (0, A_numEntries);
8513  A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
8514 
8515  if (alpha != ONE) {
8516  for (size_t k = 0; k < A_numEntries; ++k) {
8517  valView[k] *= alpha;
8518  }
8519  }
8520  C->insertGlobalValues (globalRow, indView, valView);
8521  }
8522  }
8523 
8524  if (beta != ZERO) {
8525  const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
8526  for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
8527  size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
8528  const GO globalRow = B_rowMap->getGlobalElement (localRow);
8529  if (B_numEntries > static_cast<size_t> (ind.size ())) {
8530  ind.resize (B_numEntries);
8531  val.resize (B_numEntries);
8532  }
8533  ArrayView<GO> indView = ind (0, B_numEntries);
8534  ArrayView<Scalar> valView = val (0, B_numEntries);
8535  B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
8536 
8537  if (beta != ONE) {
8538  for (size_t k = 0; k < B_numEntries; ++k) {
8539  valView[k] *= beta;
8540  }
8541  }
8542  C->insertGlobalValues (globalRow, indView, valView);
8543  }
8544  }
8545 
8546  if (callFillComplete) {
8547  if (verbose) {
8548  std::ostringstream os;
8549  os << *prefix << "Call fillComplete on C" << endl;
8550  std::cerr << os.str ();
8551  }
8552  if (fillCompleteSublist.is_null ()) {
8553  C->fillComplete (theDomainMap, theRangeMap);
8554  } else {
8555  C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
8556  }
8557  }
8558  else if (verbose) {
8559  std::ostringstream os;
8560  os << *prefix << "Do NOT call fillComplete on C" << endl;
8561  std::cerr << os.str ();
8562  }
8563 
8564  if (verbose) {
8565  std::ostringstream os;
8566  os << *prefix << "Done" << endl;
8567  std::cerr << os.str ();
8568  }
8569  return rcp_implicit_cast<row_matrix_type> (C);
8570  }
8571 
8572 
8573 
8574  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
8575  void
8578  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
8579  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
8580  const Teuchos::RCP<const map_type>& domainMap,
8581  const Teuchos::RCP<const map_type>& rangeMap,
8582  const Teuchos::RCP<Teuchos::ParameterList>& params) const
8583  {
8584  using Details::Behavior;
8589  using Teuchos::ArrayRCP;
8590  using Teuchos::ArrayView;
8591  using Teuchos::Comm;
8592  using Teuchos::ParameterList;
8593  using Teuchos::RCP;
8594  using std::endl;
8595  typedef LocalOrdinal LO;
8596  typedef GlobalOrdinal GO;
8597  typedef node_type NT;
8598  typedef CrsMatrix<Scalar, LO, GO, NT> this_type;
8599  typedef Vector<int, LO, GO, NT> IntVectorType;
8600  using Teuchos::as;
8601 
8602  const bool debug = Behavior::debug("CrsMatrix");
8603  const bool verbose = Behavior::verbose("CrsMatrix");
8604  int MyPID = getComm ()->getRank ();
8605 
8606  std::unique_ptr<std::string> verbosePrefix;
8607  if (verbose) {
8608  verbosePrefix =
8609  this->createPrefix("CrsMatrix", "transferAndFillComplete");
8610  std::ostringstream os;
8611  os << "Start" << endl;
8612  std::cerr << os.str();
8613  }
8614 
8615  //
8616  // Get the caller's parameters
8617  //
8618  bool isMM = false; // optimize for matrix-matrix ops.
8619  bool reverseMode = false; // Are we in reverse mode?
8620  bool restrictComm = false; // Do we need to restrict the communicator?
8621 
8622  int mm_optimization_core_count =
8623  Behavior::TAFC_OptimizationCoreCount();
8624  RCP<ParameterList> matrixparams; // parameters for the destination matrix
8625  bool overrideAllreduce = false;
8626  if (! params.is_null ()) {
8627  matrixparams = sublist (params, "CrsMatrix");
8628  reverseMode = params->get ("Reverse Mode", reverseMode);
8629  restrictComm = params->get ("Restrict Communicator", restrictComm);
8630  auto & slist = params->sublist("matrixmatrix: kernel params",false);
8631  isMM = slist.get("isMatrixMatrix_TransferAndFillComplete",false);
8632  mm_optimization_core_count = slist.get("MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
8633 
8634  overrideAllreduce = slist.get("MM_TAFC_OverrideAllreduceCheck",false);
8635  if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM = false;
8636  if(reverseMode) isMM = false;
8637  }
8638 
8639  // Only used in the sparse matrix-matrix multiply (isMM) case.
8640  std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
8641  int mismatch = 0;
8642  int reduced_mismatch = 0;
8643  if (isMM && !overrideAllreduce) {
8644 
8645  // Test for pathological matrix transfer
8646  const bool source_vals = ! getGraph ()->getImporter ().is_null();
8647  const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
8648  rowTransfer.getRemoteLIDs ().size() == 0);
8649  mismatch = (source_vals != target_vals) ? 1 : 0;
8650  iallreduceRequest =
8651  ::Tpetra::Details::iallreduce (mismatch, reduced_mismatch,
8652  Teuchos::REDUCE_MAX, * (getComm ()));
8653  }
8654 
8655 #ifdef HAVE_TPETRA_MMM_TIMINGS
8656  using Teuchos::TimeMonitor;
8657  std::string label;
8658  if(!params.is_null())
8659  label = params->get("Timer Label",label);
8660  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
8661  std::string tlstr;
8662  {
8663  std::ostringstream os;
8664  if(isMM) os<<":MMOpt";
8665  else os<<":MMLegacy";
8666  tlstr = os.str();
8667  }
8668 
8669  Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string("TAFC All") +tlstr ));
8670 #endif
8671 
8672  // Make sure that the input argument rowTransfer is either an
8673  // Import or an Export. Import and Export are the only two
8674  // subclasses of Transfer that we defined, but users might
8675  // (unwisely, for now at least) decide to implement their own
8676  // subclasses. Exclude this possibility.
8677  const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
8678  const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
8679  TEUCHOS_TEST_FOR_EXCEPTION(
8680  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
8681  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
8682  "argument must be either an Import or an Export, and its template "
8683  "parameters must match the corresponding template parameters of the "
8684  "CrsMatrix.");
8685 
8686  // Make sure that the input argument domainTransfer is either an
8687  // Import or an Export. Import and Export are the only two
8688  // subclasses of Transfer that we defined, but users might
8689  // (unwisely, for now at least) decide to implement their own
8690  // subclasses. Exclude this possibility.
8691  Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
8692  Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
8693 
8694  if(! domainTransfer.is_null()) {
8695  TEUCHOS_TEST_FOR_EXCEPTION(
8696  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
8697  "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
8698  "argument must be either an Import or an Export, and its template "
8699  "parameters must match the corresponding template parameters of the "
8700  "CrsMatrix.");
8701 
8702  TEUCHOS_TEST_FOR_EXCEPTION(
8703  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
8704  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
8705  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
8706  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8707  "arguments must be of the same type (either Import or Export).");
8708 
8709  TEUCHOS_TEST_FOR_EXCEPTION(
8710  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
8711  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
8712  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
8713  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
8714  "arguments must be of the same type (either Import or Export).");
8715  } // domainTransfer != null
8716 
8717 
8718  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
8719  // if the source Map is not distributed but the target Map is?
8720  const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
8721 
8722  // Get the new domain and range Maps. We need some of them for
8723  // error checking, now that we have the reverseMode parameter.
8724  RCP<const map_type> MyRowMap = reverseMode ?
8725  rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
8726  RCP<const map_type> MyColMap; // create this below
8727  RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
8728  domainMap : getDomainMap ();
8729  RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
8730  rangeMap : getRangeMap ();
8731  RCP<const map_type> BaseRowMap = MyRowMap;
8732  RCP<const map_type> BaseDomainMap = MyDomainMap;
8733 
8734  // If the user gave us a nonnull destMat, then check whether it's
8735  // "pristine." That means that it has no entries.
8736  //
8737  // FIXME (mfh 15 May 2014) If this is not true on all processes,
8738  // then this exception test may hang. It would be better to
8739  // forward an error flag to the next communication phase.
8740  if (! destMat.is_null ()) {
8741  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
8742  // whether a graph or matrix has no entries on the calling
8743  // process, is that it is neither locally nor globally indexed.
8744  // This may change eventually with the Kokkos refactor version
8745  // of Tpetra, so it would be better just to check the quantity
8746  // of interest directly. Note that with the Kokkos refactor
8747  // version of Tpetra, asking for the total number of entries in
8748  // a graph or matrix that is not fill complete might require
8749  // computation (kernel launch), since it is not thread scalable
8750  // to update a count every time an entry is inserted.
8751  const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
8752  ! destMat->getGraph ()->isGloballyIndexed ();
8753  TEUCHOS_TEST_FOR_EXCEPTION(
8754  ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
8755  "transferAndFillComplete: The input argument 'destMat' is only allowed "
8756  "to be nonnull, if its graph is empty (neither locally nor globally "
8757  "indexed).");
8758  // FIXME (mfh 15 May 2014) At some point, we want to change
8759  // graphs and matrices so that their DistObject Map
8760  // (this->getMap()) may differ from their row Map. This will
8761  // make redistribution for 2-D distributions more efficient. I
8762  // hesitate to change this check, because I'm not sure how much
8763  // the code here depends on getMap() and getRowMap() being the
8764  // same.
8765  TEUCHOS_TEST_FOR_EXCEPTION(
8766  ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
8767  "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
8768  "input argument 'destMat' is not the same as the (row) Map specified "
8769  "by the input argument 'rowTransfer'.");
8770  TEUCHOS_TEST_FOR_EXCEPTION(
8771  ! destMat->checkSizes (*this), std::invalid_argument,
8772  "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
8773  "destination matrix, but checkSizes() indicates that it is not a legal "
8774  "legal target for redistribution from the source matrix (*this). This "
8775  "may mean that they do not have the same dimensions.");
8776  }
8777 
8778  // If forward mode (the default), then *this's (row) Map must be
8779  // the same as the source Map of the Transfer. If reverse mode,
8780  // then *this's (row) Map must be the same as the target Map of
8781  // the Transfer.
8782  //
8783  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
8784  // and matrices so that their DistObject Map (this->getMap()) may
8785  // differ from their row Map. This will make redistribution for
8786  // 2-D distributions more efficient. I hesitate to change this
8787  // check, because I'm not sure how much the code here depends on
8788  // getMap() and getRowMap() being the same.
8789  TEUCHOS_TEST_FOR_EXCEPTION(
8790  ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
8791  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8792  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
8793  TEUCHOS_TEST_FOR_EXCEPTION(
8794  ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
8795  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
8796  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
8797 
8798  // checks for domainTransfer
8799  TEUCHOS_TEST_FOR_EXCEPTION(
8800  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
8801  std::invalid_argument,
8802  "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
8803  "argument must be the same as the rebalanced domain map 'domainMap'");
8804 
8805  TEUCHOS_TEST_FOR_EXCEPTION(
8806  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
8807  std::invalid_argument,
8808  "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
8809  "argument must be the same as the rebalanced domain map 'domainMap'");
8810 
8811  // The basic algorithm here is:
8812  //
8813  // 1. Call the moral equivalent of "distor.do" to handle the import.
8814  // 2. Copy all the Imported and Copy/Permuted data into the raw
8815  // CrsMatrix / CrsGraphData pointers, still using GIDs.
8816  // 3. Call an optimized version of MakeColMap that avoids the
8817  // Directory lookups (since the importer knows who owns all the
8818  // GIDs) AND reindexes to LIDs.
8819  // 4. Call expertStaticFillComplete()
8820 
8821  // Get information from the Importer
8822  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
8823  ArrayView<const LO> ExportLIDs = reverseMode ?
8824  rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
8825  ArrayView<const LO> RemoteLIDs = reverseMode ?
8826  rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
8827  ArrayView<const LO> PermuteToLIDs = reverseMode ?
8828  rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
8829  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
8830  rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
8831  Distributor& Distor = rowTransfer.getDistributor ();
8832 
8833  // Owning PIDs
8834  Teuchos::Array<int> SourcePids;
8835  Teuchos::Array<int> TargetPids;
8836 
8837  // Temp variables for sub-communicators
8838  RCP<const map_type> ReducedRowMap, ReducedColMap,
8839  ReducedDomainMap, ReducedRangeMap;
8840  RCP<const Comm<int> > ReducedComm;
8841 
8842  // If the user gave us a null destMat, then construct the new
8843  // destination matrix. We will replace its column Map later.
8844  if (destMat.is_null ()) {
8845  destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
8846  }
8847 
8848  /***************************************************/
8849  /***** 1) First communicator restriction phase ****/
8850  /***************************************************/
8851  if (restrictComm) {
8852  ReducedRowMap = MyRowMap->removeEmptyProcesses ();
8853  ReducedComm = ReducedRowMap.is_null () ?
8854  Teuchos::null :
8855  ReducedRowMap->getComm ();
8856  destMat->removeEmptyProcessesInPlace (ReducedRowMap);
8857 
8858  ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
8859  ReducedRowMap :
8860  MyDomainMap->replaceCommWithSubset (ReducedComm);
8861  ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
8862  ReducedRowMap :
8863  MyRangeMap->replaceCommWithSubset (ReducedComm);
8864 
8865  // Reset the "my" maps
8866  MyRowMap = ReducedRowMap;
8867  MyDomainMap = ReducedDomainMap;
8868  MyRangeMap = ReducedRangeMap;
8869 
8870  // Update my PID, if we've restricted the communicator
8871  if (! ReducedComm.is_null ()) {
8872  MyPID = ReducedComm->getRank ();
8873  }
8874  else {
8875  MyPID = -2; // For debugging
8876  }
8877  }
8878  else {
8879  ReducedComm = MyRowMap->getComm ();
8880  }
8881 
8882 
8883 
8884  /***************************************************/
8885  /***** 2) From Tpera::DistObject::doTransfer() ****/
8886  /***************************************************/
8887  // Get the owning PIDs
8888  RCP<const import_type> MyImporter = getGraph ()->getImporter ();
8889 
8890  // check whether domain maps of source matrix and base domain map is the same
8891  bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
8892 
8893  if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
8894  // Same domain map as source matrix
8895  //
8896  // NOTE: This won't work for restrictComm (because the Import
8897  // doesn't know the restricted PIDs), though writing an
8898  // optimized version for that case would be easy (Import an
8899  // IntVector of the new PIDs). Might want to add this later.
8900  Import_Util::getPids (*MyImporter, SourcePids, false);
8901  }
8902  else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
8903  // Same domain map as source matrix (restricted communicator)
8904  // We need one import from the domain to the column map
8905  IntVectorType SourceDomain_pids(getDomainMap (),true);
8906  IntVectorType SourceCol_pids(getColMap());
8907  // SourceDomain_pids contains the restricted pids
8908  SourceDomain_pids.putScalar(MyPID);
8909 
8910  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8911  SourcePids.resize (getColMap ()->getNodeNumElements ());
8912  SourceCol_pids.get1dCopy (SourcePids ());
8913  }
8914  else if (MyImporter.is_null () && bSameDomainMap) {
8915  // Matrix has no off-process entries
8916  SourcePids.resize (getColMap ()->getNodeNumElements ());
8917  SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
8918  }
8919  else if ( ! MyImporter.is_null () &&
8920  ! domainTransfer.is_null () ) {
8921  // general implementation for rectangular matrices with
8922  // domain map different than SourceMatrix domain map.
8923  // User has to provide a DomainTransfer object. We need
8924  // to communications (import/export)
8925 
8926  // TargetDomain_pids lives on the rebalanced new domain map
8927  IntVectorType TargetDomain_pids (domainMap);
8928  TargetDomain_pids.putScalar (MyPID);
8929 
8930  // SourceDomain_pids lives on the non-rebalanced old domain map
8931  IntVectorType SourceDomain_pids (getDomainMap ());
8932 
8933  // SourceCol_pids lives on the non-rebalanced old column map
8934  IntVectorType SourceCol_pids (getColMap ());
8935 
8936  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8937  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8938  }
8939  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8940  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8941  }
8942  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8943  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8944  }
8945  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8946  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8947  }
8948  else {
8949  TEUCHOS_TEST_FOR_EXCEPTION(
8950  true, std::logic_error, "Tpetra::CrsMatrix::"
8951  "transferAndFillComplete: Should never get here! "
8952  "Please report this bug to a Tpetra developer.");
8953  }
8954  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8955  SourcePids.resize (getColMap ()->getNodeNumElements ());
8956  SourceCol_pids.get1dCopy (SourcePids ());
8957  }
8958  else if ( ! MyImporter.is_null () &&
8959  BaseDomainMap->isSameAs (*BaseRowMap) &&
8960  getDomainMap ()->isSameAs (*getRowMap ())) {
8961  // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
8962 
8963  IntVectorType TargetRow_pids (domainMap);
8964  IntVectorType SourceRow_pids (getRowMap ());
8965  IntVectorType SourceCol_pids (getColMap ());
8966 
8967  TargetRow_pids.putScalar (MyPID);
8968  if (! reverseMode && xferAsImport != nullptr) {
8969  SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
8970  }
8971  else if (reverseMode && xferAsExport != nullptr) {
8972  SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
8973  }
8974  else if (! reverseMode && xferAsExport != nullptr) {
8975  SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
8976  }
8977  else if (reverseMode && xferAsImport != nullptr) {
8978  SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
8979  }
8980  else {
8981  TEUCHOS_TEST_FOR_EXCEPTION(
8982  true, std::logic_error, "Tpetra::CrsMatrix::"
8983  "transferAndFillComplete: Should never get here! "
8984  "Please report this bug to a Tpetra developer.");
8985  }
8986 
8987  SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
8988  SourcePids.resize (getColMap ()->getNodeNumElements ());
8989  SourceCol_pids.get1dCopy (SourcePids ());
8990  }
8991  else {
8992  TEUCHOS_TEST_FOR_EXCEPTION(
8993  true, std::invalid_argument, "Tpetra::CrsMatrix::"
8994  "transferAndFillComplete: This method only allows either domainMap == "
8995  "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8996  "getDomainMap () == getRowMap ()).");
8997  }
8998 
8999  // Tpetra-specific stuff
9000  size_t constantNumPackets = destMat->constantNumberOfPackets ();
9001  if (constantNumPackets == 0) {
9002  destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
9003  RemoteLIDs.size ());
9004  }
9005  else {
9006  // There are a constant number of packets per element. We
9007  // already know (from the number of "remote" (incoming)
9008  // elements) how many incoming elements we expect, so we can
9009  // resize the buffer accordingly.
9010  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
9011  destMat->reallocImportsIfNeeded (rbufLen, false, nullptr);
9012  }
9013 
9014  // Pack & Prepare w/ owning PIDs
9015  if (debug) {
9016  using Teuchos::outArg;
9017  using Teuchos::REDUCE_MAX;
9018  using Teuchos::reduceAll;
9019  using std::cerr;
9020  using std::endl;
9021  RCP<const Teuchos::Comm<int> > comm = this->getComm ();
9022  const int myRank = comm->getRank ();
9023 
9024  std::ostringstream errStrm;
9025  int lclErr = 0;
9026  int gblErr = 0;
9027 
9028  Teuchos::ArrayView<size_t> numExportPacketsPerLID;
9029  try {
9030  // packAndPrepare* methods modify numExportPacketsPerLID_.
9031  destMat->numExportPacketsPerLID_.modify_host ();
9032  numExportPacketsPerLID =
9033  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
9034  }
9035  catch (std::exception& e) {
9036  errStrm << "Proc " << myRank << ": getArrayViewFromDualView threw: "
9037  << e.what () << std::endl;
9038  lclErr = 1;
9039  }
9040  catch (...) {
9041  errStrm << "Proc " << myRank << ": getArrayViewFromDualView threw "
9042  "an exception not a subclass of std::exception" << std::endl;
9043  lclErr = 1;
9044  }
9045 
9046  if (! comm.is_null ()) {
9047  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
9048  }
9049  if (gblErr != 0) {
9050  ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
9051  TEUCHOS_TEST_FOR_EXCEPTION(
9052  true, std::runtime_error, "getArrayViewFromDualView threw an "
9053  "exception on at least one process.");
9054  }
9055 
9056  if (verbose) {
9057  std::ostringstream os;
9058  os << *verbosePrefix << "Calling packCrsMatrixWithOwningPIDs"
9059  << std::endl;
9060  std::cerr << os.str ();
9061  }
9062  try {
9064  destMat->exports_,
9065  numExportPacketsPerLID,
9066  ExportLIDs,
9067  SourcePids,
9068  constantNumPackets,
9069  Distor);
9070  }
9071  catch (std::exception& e) {
9072  errStrm << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs threw: "
9073  << e.what () << std::endl;
9074  lclErr = 1;
9075  }
9076  catch (...) {
9077  errStrm << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs threw "
9078  "an exception not a subclass of std::exception" << std::endl;
9079  lclErr = 1;
9080  }
9081 
9082  if (verbose) {
9083  std::ostringstream os;
9084  os << *verbosePrefix << "Done with packCrsMatrixWithOwningPIDs"
9085  << std::endl;
9086  std::cerr << os.str ();
9087  }
9088 
9089  if (! comm.is_null ()) {
9090  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
9091  }
9092  if (gblErr != 0) {
9093  ::Tpetra::Details::gathervPrint (cerr, errStrm.str (), *comm);
9094  TEUCHOS_TEST_FOR_EXCEPTION(
9095  true, std::runtime_error, "packCrsMatrixWithOwningPIDs threw an "
9096  "exception on at least one process.");
9097  }
9098  }
9099  else {
9100  // packAndPrepare* methods modify numExportPacketsPerLID_.
9101  destMat->numExportPacketsPerLID_.modify_host ();
9102  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
9103  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
9104  if (verbose) {
9105  std::ostringstream os;
9106  os << *verbosePrefix << "Calling packCrsMatrixWithOwningPIDs"
9107  << std::endl;
9108  std::cerr << os.str ();
9109  }
9111  destMat->exports_,
9112  numExportPacketsPerLID,
9113  ExportLIDs,
9114  SourcePids,
9115  constantNumPackets,
9116  Distor);
9117  if (verbose) {
9118  std::ostringstream os;
9119  os << *verbosePrefix << "Done with packCrsMatrixWithOwningPIDs"
9120  << std::endl;
9121  std::cerr << os.str ();
9122  }
9123  }
9124 
9125  // Do the exchange of remote data.
9126  if (! communication_needed) {
9127  if (verbose) {
9128  std::ostringstream os;
9129  os << *verbosePrefix << "Communication not needed" << std::endl;
9130  std::cerr << os.str ();
9131  }
9132  }
9133  else {
9134  if (reverseMode) {
9135  if (constantNumPackets == 0) { // variable number of packets per LID
9136  if (verbose) {
9137  std::ostringstream os;
9138  os << *verbosePrefix << "Reverse mode, variable # packets / LID"
9139  << std::endl;
9140  std::cerr << os.str ();
9141  }
9142  // Make sure that host has the latest version, since we're
9143  // using the version on host. If host has the latest
9144  // version, syncing to host does nothing.
9145  destMat->numExportPacketsPerLID_.sync_host ();
9146  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
9147  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
9148  destMat->numImportPacketsPerLID_.sync_host ();
9149  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
9150  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
9151 
9152  if (verbose) {
9153  std::ostringstream os;
9154  os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits"
9155  << std::endl;
9156  std::cerr << os.str ();
9157  }
9158  Distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
9159  numImportPacketsPerLID);
9160  if (verbose) {
9161  std::ostringstream os;
9162  os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits"
9163  << std::endl;
9164  std::cerr << os.str ();
9165  }
9166 
9167  size_t totalImportPackets = 0;
9168  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
9169  totalImportPackets += numImportPacketsPerLID[i];
9170  }
9171 
9172  // Reallocation MUST go before setting the modified flag,
9173  // because it may clear out the flags.
9174  destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
9175  verbosePrefix.get ());
9176  destMat->imports_.modify_host ();
9177  Teuchos::ArrayView<char> hostImports =
9178  getArrayViewFromDualView (destMat->imports_);
9179  // This is a legacy host pack/unpack path, so use the host
9180  // version of exports_.
9181  destMat->exports_.sync_host ();
9182  Teuchos::ArrayView<const char> hostExports =
9183  getArrayViewFromDualView (destMat->exports_);
9184  if (verbose) {
9185  std::ostringstream os;
9186  os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaits"
9187  << std::endl;
9188  std::cerr << os.str ();
9189  }
9190  Distor.doReversePostsAndWaits (hostExports,
9191  numExportPacketsPerLID,
9192  hostImports,
9193  numImportPacketsPerLID);
9194  if (verbose) {
9195  std::ostringstream os;
9196  os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaits"
9197  << std::endl;
9198  std::cerr << os.str ();
9199  }
9200  }
9201  else { // constant number of packets per LID
9202  if (verbose) {
9203  std::ostringstream os;
9204  os << *verbosePrefix << "Reverse mode, constant # packets / LID"
9205  << std::endl;
9206  std::cerr << os.str ();
9207  }
9208  destMat->imports_.modify_host ();
9209  Teuchos::ArrayView<char> hostImports =
9210  getArrayViewFromDualView (destMat->imports_);
9211  // This is a legacy host pack/unpack path, so use the host
9212  // version of exports_.
9213  destMat->exports_.sync_host ();
9214  Teuchos::ArrayView<const char> hostExports =
9215  getArrayViewFromDualView (destMat->exports_);
9216  if (verbose) {
9217  std::ostringstream os;
9218  os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits"
9219  << std::endl;
9220  std::cerr << os.str ();
9221  }
9222  Distor.doReversePostsAndWaits (hostExports,
9223  constantNumPackets,
9224  hostImports);
9225  if (verbose) {
9226  std::ostringstream os;
9227  os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits"
9228  << std::endl;
9229  std::cerr << os.str ();
9230  }
9231  }
9232  }
9233  else { // forward mode (the default)
9234  if (constantNumPackets == 0) { // variable number of packets per LID
9235  if (verbose) {
9236  std::ostringstream os;
9237  os << *verbosePrefix << "Forward mode, variable # packets / LID"
9238  << std::endl;
9239  std::cerr << os.str ();
9240  }
9241  // Make sure that host has the latest version, since we're
9242  // using the version on host. If host has the latest
9243  // version, syncing to host does nothing.
9244  destMat->numExportPacketsPerLID_.sync_host ();
9245  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
9246  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
9247  destMat->numImportPacketsPerLID_.sync_host ();
9248  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
9249  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
9250  if (verbose) {
9251  std::ostringstream os;
9252  os << *verbosePrefix << "Calling 3-arg doPostsAndWaits"
9253  << std::endl;
9254  std::cerr << os.str ();
9255  }
9256  Distor.doPostsAndWaits (numExportPacketsPerLID, 1,
9257  numImportPacketsPerLID);
9258  if (verbose) {
9259  std::ostringstream os;
9260  os << *verbosePrefix << "Finished 3-arg doPostsAndWaits"
9261  << std::endl;
9262  std::cerr << os.str ();
9263  }
9264 
9265  size_t totalImportPackets = 0;
9266  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
9267  totalImportPackets += numImportPacketsPerLID[i];
9268  }
9269 
9270  // Reallocation MUST go before setting the modified flag,
9271  // because it may clear out the flags.
9272  destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
9273  verbosePrefix.get ());
9274  destMat->imports_.modify_host ();
9275  Teuchos::ArrayView<char> hostImports =
9276  getArrayViewFromDualView (destMat->imports_);
9277  // This is a legacy host pack/unpack path, so use the host
9278  // version of exports_.
9279  destMat->exports_.sync_host ();
9280  Teuchos::ArrayView<const char> hostExports =
9281  getArrayViewFromDualView (destMat->exports_);
9282  if (verbose) {
9283  std::ostringstream os;
9284  os << *verbosePrefix << "Calling 4-arg doPostsAndWaits"
9285  << std::endl;
9286  std::cerr << os.str ();
9287  }
9288  Distor.doPostsAndWaits (hostExports,
9289  numExportPacketsPerLID,
9290  hostImports,
9291  numImportPacketsPerLID);
9292  if (verbose) {
9293  std::ostringstream os;
9294  os << *verbosePrefix << "Finished 4-arg doPostsAndWaits"
9295  << std::endl;
9296  std::cerr << os.str ();
9297  }
9298  }
9299  else { // constant number of packets per LID
9300  if (verbose) {
9301  std::ostringstream os;
9302  os << *verbosePrefix << "Forward mode, constant # packets / LID"
9303  << std::endl;
9304  std::cerr << os.str ();
9305  }
9306  destMat->imports_.modify_host ();
9307  Teuchos::ArrayView<char> hostImports =
9308  getArrayViewFromDualView (destMat->imports_);
9309  // This is a legacy host pack/unpack path, so use the host
9310  // version of exports_.
9311  destMat->exports_.sync_host ();
9312  Teuchos::ArrayView<const char> hostExports =
9313  getArrayViewFromDualView (destMat->exports_);
9314  if (verbose) {
9315  std::ostringstream os;
9316  os << *verbosePrefix << "Calling 3-arg doPostsAndWaits"
9317  << std::endl;
9318  std::cerr << os.str ();
9319  }
9320  Distor.doPostsAndWaits (hostExports,
9321  constantNumPackets,
9322  hostImports);
9323  if (verbose) {
9324  std::ostringstream os;
9325  os << *verbosePrefix << "Finished 3-arg doPostsAndWaits"
9326  << std::endl;
9327  std::cerr << os.str ();
9328  }
9329  }
9330  }
9331  }
9332 
9333  /*********************************************************************/
9334  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
9335  /*********************************************************************/
9336 
9337  // Backwards compatibility measure. We'll use this again below.
9338  destMat->numImportPacketsPerLID_.sync_host ();
9339  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
9340  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
9341  destMat->imports_.sync_host ();
9342  Teuchos::ArrayView<const char> hostImports =
9343  getArrayViewFromDualView (destMat->imports_);
9344 
9345  if (verbose) {
9346  std::ostringstream os;
9347  os << *verbosePrefix << "Calling unpackAndCombineWithOwningPIDsCount"
9348  << std::endl;
9349  std::cerr << os.str ();
9350  }
9351  size_t mynnz =
9353  RemoteLIDs,
9354  hostImports,
9355  numImportPacketsPerLID,
9356  constantNumPackets,
9357  Distor,
9358  INSERT,
9359  NumSameIDs,
9360  PermuteToLIDs,
9361  PermuteFromLIDs);
9362  if (verbose) {
9363  std::ostringstream os;
9364  os << *verbosePrefix << "unpackAndCombineWithOwningPIDsCount returned "
9365  << mynnz << std::endl;
9366  std::cerr << os.str ();
9367  }
9368  size_t N = BaseRowMap->getNodeNumElements ();
9369 
9370  // Allocations
9371  ArrayRCP<size_t> CSR_rowptr(N+1);
9372  ArrayRCP<GO> CSR_colind_GID;
9373  ArrayRCP<LO> CSR_colind_LID;
9374  ArrayRCP<Scalar> CSR_vals;
9375  CSR_colind_GID.resize (mynnz);
9376  CSR_vals.resize (mynnz);
9377 
9378  // If LO and GO are the same, we can reuse memory when
9379  // converting the column indices from global to local indices.
9380  if (typeid (LO) == typeid (GO)) {
9381  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
9382  }
9383  else {
9384  CSR_colind_LID.resize (mynnz);
9385  }
9386 
9387  if (verbose) {
9388  std::ostringstream os;
9389  os << *verbosePrefix << "Calling unpackAndCombineIntoCrsArrays"
9390  << std::endl;
9391  std::cerr << os.str ();
9392  }
9393  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
9394  // unpackAndCombine method on a "CrsArrays" object? This passing
9395  // in a huge list of arrays is icky. Can't we have a bit of an
9396  // abstraction? Implementing a concrete DistObject subclass only
9397  // takes five methods.
9399  RemoteLIDs,
9400  hostImports,
9401  numImportPacketsPerLID,
9402  constantNumPackets,
9403  Distor,
9404  INSERT,
9405  NumSameIDs,
9406  PermuteToLIDs,
9407  PermuteFromLIDs,
9408  N,
9409  mynnz,
9410  MyPID,
9411  CSR_rowptr (),
9412  CSR_colind_GID (),
9413  Teuchos::av_reinterpret_cast<impl_scalar_type> (CSR_vals ()),
9414  SourcePids (),
9415  TargetPids);
9416 
9417  /**************************************************************/
9418  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
9419  /**************************************************************/
9420  // Call an optimized version of makeColMap that avoids the
9421  // Directory lookups (since the Import object knows who owns all
9422  // the GIDs).
9423  Teuchos::Array<int> RemotePids;
9424  if (verbose) {
9425  std::ostringstream os;
9426  os << *verbosePrefix << "Calling lowCommunicationMakeColMapAndReindex"
9427  << std::endl;
9428  std::cerr << os.str ();
9429  }
9430  Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
9431  CSR_colind_LID (),
9432  CSR_colind_GID (),
9433  BaseDomainMap,
9434  TargetPids,
9435  RemotePids,
9436  MyColMap);
9437 
9438  if (verbose) {
9439  std::ostringstream os;
9440  os << *verbosePrefix << "restrictComm="
9441  << (restrictComm ? "true" : "false") << std::endl;
9442  std::cerr << os.str ();
9443  }
9444 
9445  /*******************************************************/
9446  /**** 4) Second communicator restriction phase ****/
9447  /*******************************************************/
9448  if (restrictComm) {
9449  ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
9450  ReducedRowMap :
9451  MyColMap->replaceCommWithSubset (ReducedComm);
9452  MyColMap = ReducedColMap; // Reset the "my" maps
9453  }
9454 
9455  // Replace the col map
9456  if (verbose) {
9457  std::ostringstream os;
9458  os << *verbosePrefix << "Calling replaceColMap" << std::endl;
9459  std::cerr << os.str ();
9460  }
9461  destMat->replaceColMap (MyColMap);
9462 
9463  // Short circuit if the processor is no longer in the communicator
9464  //
9465  // NOTE: Epetra replaces modifies all "removed" processes so they
9466  // have a dummy (serial) Map that doesn't touch the original
9467  // communicator. Duplicating that here might be a good idea.
9468  if (ReducedComm.is_null ()) {
9469  if (verbose) {
9470  std::ostringstream os;
9471  os << *verbosePrefix << "I am no longer in the communicator; "
9472  "returning" << std::endl;
9473  std::cerr << os.str ();
9474  }
9475  return;
9476  }
9477 
9478  /***************************************************/
9479  /**** 5) Sort ****/
9480  /***************************************************/
9481  if ((! reverseMode && xferAsImport != nullptr) ||
9482  (reverseMode && xferAsExport != nullptr)) {
9483  if (verbose) {
9484  std::ostringstream os;
9485  os << *verbosePrefix << "Calling sortCrsEntries" << endl;
9486  std::cerr << os.str ();
9487  }
9488  Import_Util::sortCrsEntries (CSR_rowptr (),
9489  CSR_colind_LID (),
9490  CSR_vals ());
9491  }
9492  else if ((! reverseMode && xferAsExport != nullptr) ||
9493  (reverseMode && xferAsImport != nullptr)) {
9494  if (verbose) {
9495  std::ostringstream os;
9496  os << *verbosePrefix << "Calling sortAndMergeCrsEntries"
9497  << endl;
9498  std::cerr << os.str();
9499  }
9500  Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
9501  CSR_colind_LID (),
9502  CSR_vals ());
9503  if (CSR_rowptr[N] != mynnz) {
9504  CSR_colind_LID.resize (CSR_rowptr[N]);
9505  CSR_vals.resize (CSR_rowptr[N]);
9506  }
9507  }
9508  else {
9509  TEUCHOS_TEST_FOR_EXCEPTION(
9510  true, std::logic_error, "Tpetra::CrsMatrix::"
9511  "transferAndFillComplete: Should never get here! "
9512  "Please report this bug to a Tpetra developer.");
9513  }
9514  /***************************************************/
9515  /**** 6) Reset the colmap and the arrays ****/
9516  /***************************************************/
9517 
9518  if (verbose) {
9519  std::ostringstream os;
9520  os << *verbosePrefix << "Calling destMat->setAllValues" << endl;
9521  std::cerr << os.str ();
9522  }
9523 
9524  // Call constructor for the new matrix (restricted as needed)
9525  //
9526  // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
9527  // refactor version of CrsMatrix, though it reserves the right to
9528  // make a deep copy of the arrays.
9529  destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
9530 
9531  /***************************************************/
9532  /**** 7) Build Importer & Call ESFC ****/
9533  /***************************************************/
9534  // Pre-build the importer using the existing PIDs
9535  Teuchos::ParameterList esfc_params;
9536 
9537  RCP<import_type> MyImport;
9538 
9539  // Fulfull the non-blocking allreduce on reduced_mismatch.
9540  if (iallreduceRequest.get () != nullptr) {
9541  if (verbose) {
9542  std::ostringstream os;
9543  os << *verbosePrefix << "Calling iallreduceRequest->wait()"
9544  << endl;
9545  std::cerr << os.str ();
9546  }
9547  iallreduceRequest->wait ();
9548  if (reduced_mismatch != 0) {
9549  isMM = false;
9550  }
9551  }
9552 
9553  if( isMM ) {
9554 #ifdef HAVE_TPETRA_MMM_TIMINGS
9555  Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string("isMM Block")));
9556 #endif
9557  // Combine all type1/2/3 lists, [filter them], then call the expert import constructor.
9558 
9559  if (verbose) {
9560  std::ostringstream os;
9561  os << *verbosePrefix << "Calling getAllValues" << endl;
9562  std::cerr << os.str ();
9563  }
9564 
9565  Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
9566  Teuchos::ArrayRCP<int> type3PIDs;
9567  Teuchos::ArrayRCP<const size_t> rowptr;
9568  Teuchos::ArrayRCP<const LO> colind;
9569  Teuchos::ArrayRCP<const Scalar> vals;
9570  {
9571 #ifdef HAVE_TPETRA_MMM_TIMINGS
9572  TimeMonitor tm_getAllValues (*TimeMonitor::getNewTimer(prefix + std::string("isMMgetAllValues")));
9573 #endif
9574  getAllValues(rowptr,colind,vals);
9575  }
9576 
9577  if (verbose) {
9578  std::ostringstream os;
9579  os << *verbosePrefix << "Calling reverseNeighborDiscovery" << std::endl;
9580  std::cerr << os.str ();
9581  }
9582 
9583  {
9584 #ifdef HAVE_TPETRA_MMM_TIMINGS
9585  TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string("isMMrevNeighDis")));
9586 #endif
9587  Import_Util::reverseNeighborDiscovery(*this,
9588  rowptr,
9589  colind,
9590  rowTransfer,
9591  MyImporter,
9592  MyDomainMap,
9593  type3PIDs,
9594  type3LIDs,
9595  ReducedComm);
9596  }
9597 
9598  if (verbose) {
9599  std::ostringstream os;
9600  os << *verbosePrefix << "Done with reverseNeighborDiscovery" << std::endl;
9601  std::cerr << os.str ();
9602  }
9603 
9604  Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
9605  Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportLIDs();
9606 
9607  Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs(); // row matrix
9608  Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
9609 
9610  const int numCols = getGraph()->getColMap()->getNodeNumElements(); // may be dup
9611  // from EpetraExt_MMHelpers.cpp: build_type2_exports
9612  std::vector<bool> IsOwned(numCols,true);
9613  std::vector<int> SentTo(numCols,-1);
9614  if (! MyImporter.is_null ()) {
9615  for (auto && rlid : MyImporter->getRemoteLIDs()) { // the remoteLIDs must be from sourcematrix
9616  IsOwned[rlid]=false;
9617  }
9618  }
9619 
9620  std::vector<std::pair<int,GO> > usrtg;
9621  usrtg.reserve(TEPID2.size());
9622 
9623  {
9624  const auto& colMap = * (this->getColMap ()); // *this is sourcematrix
9625  for (Array_size_type i = 0; i < TEPID2.size (); ++i) {
9626  const LO row = TELID2[i];
9627  const int pid = TEPID2[i];
9628  for (auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
9629  const int col = colind[j];
9630  if (IsOwned[col] && SentTo[col] != pid) {
9631  SentTo[col] = pid;
9632  GO gid = colMap.getGlobalElement (col);
9633  usrtg.push_back (std::pair<int,GO> (pid, gid));
9634  }
9635  }
9636  }
9637  }
9638 
9639 // This sort can _not_ be omitted.[
9640  std::sort(usrtg.begin(),usrtg.end()); // default comparator does the right thing, now sorted in gid order
9641  auto eopg = std ::unique(usrtg.begin(),usrtg.end());
9642  // 25 Jul 2018: Could just ignore the entries at and after eopg.
9643  usrtg.erase(eopg,usrtg.end());
9644 
9645  const Array_size_type type2_us_size = usrtg.size();
9646  Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(new int[type2_us_size],0,type2_us_size,true);
9647  Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(new LO[type2_us_size],0,type2_us_size,true);
9648 
9649  int pos=0;
9650  for(auto && p : usrtg) {
9651  EPID2[pos]= p.first;
9652  ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
9653  pos++;
9654  }
9655 
9656  Teuchos::ArrayView<int> EPID3 = type3PIDs();
9657  Teuchos::ArrayView< LO> ELID3 = type3LIDs();
9658  GO InfGID = std::numeric_limits<GO>::max();
9659  int InfPID = INT_MAX;
9660 #ifdef TPETRA_MIN3
9661 # undef TPETRA_MIN3
9662 #endif // TPETRA_MIN3
9663 #define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
9664  int i1=0, i2=0, i3=0;
9665  int Len1 = EPID1.size();
9666  int Len2 = EPID2.size();
9667  int Len3 = EPID3.size();
9668 
9669  int MyLen=Len1+Len2+Len3;
9670  Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(new LO[MyLen],0,MyLen,true);
9671  Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(new int[MyLen],0,MyLen,true);
9672  int iloc = 0; // will be the size of the userExportLID/PIDs
9673 
9674  while(i1 < Len1 || i2 < Len2 || i3 < Len3){
9675  int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
9676  int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
9677  int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
9678 
9679  GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
9680  GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
9681  GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
9682 
9683  int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
9684  GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
9685 #ifdef TPETRA_MIN3
9686 # undef TPETRA_MIN3
9687 #endif // TPETRA_MIN3
9688  bool added_entry=false;
9689 
9690  if(PID1 == MIN_PID && GID1 == MIN_GID){
9691  userExportLIDs[iloc]=ELID1[i1];
9692  userExportPIDs[iloc]=EPID1[i1];
9693  i1++;
9694  added_entry=true;
9695  iloc++;
9696  }
9697  if(PID2 == MIN_PID && GID2 == MIN_GID){
9698  if(!added_entry) {
9699  userExportLIDs[iloc]=ELID2[i2];
9700  userExportPIDs[iloc]=EPID2[i2];
9701  added_entry=true;
9702  iloc++;
9703  }
9704  i2++;
9705  }
9706  if(PID3 == MIN_PID && GID3 == MIN_GID){
9707  if(!added_entry) {
9708  userExportLIDs[iloc]=ELID3[i3];
9709  userExportPIDs[iloc]=EPID3[i3];
9710  iloc++;
9711  }
9712  i3++;
9713  }
9714  }
9715 
9716  if (verbose) {
9717  std::ostringstream os;
9718  os << *verbosePrefix << "Create Import" << std::endl;
9719  std::cerr << os.str ();
9720  }
9721 
9722 #ifdef HAVE_TPETRA_MMM_TIMINGS
9723  auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string("isMMIportCtor")));
9724 #endif
9725  Teuchos::RCP<Teuchos::ParameterList> plist = rcp(new Teuchos::ParameterList());
9726  // 25 Jul 2018: Test for equality with the non-isMM path's Import object.
9727  if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9728  MyImport = rcp ( new import_type (MyDomainMap,
9729  MyColMap,
9730  RemotePids,
9731  userExportLIDs.view(0,iloc).getConst(),
9732  userExportPIDs.view(0,iloc).getConst(),
9733  plist)
9734  );
9735 
9736  if (verbose) {
9737  std::ostringstream os;
9738  os << *verbosePrefix << "Call expertStaticFillComplete" << std::endl;
9739  std::cerr << os.str ();
9740  }
9741 
9742  {
9743 #ifdef HAVE_TPETRA_MMM_TIMINGS
9744  TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string("isMM::destMat->eSFC")));
9745  esfc_params.set("Timer Label",label+std::string("isMM eSFC"));
9746 #endif
9747  if(!params.is_null())
9748  esfc_params.set("compute global constants",params->get("compute global constants",true));
9749  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(new Teuchos::ParameterList(esfc_params)));
9750 
9751  }
9752 
9753  } // if(isMM)
9754  else {
9755 #ifdef HAVE_TPETRA_MMM_TIMINGS
9756  TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string("TAFC notMMblock")));
9757 #endif
9758  if (verbose) {
9759  std::ostringstream os;
9760  os << *verbosePrefix << "Create Import" << std::endl;
9761  std::cerr << os.str ();
9762  }
9763 
9764 #ifdef HAVE_TPETRA_MMM_TIMINGS
9765  TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC notMMCreateImporter")));
9766 #endif
9767  Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(new Teuchos::ParameterList);
9768  mypars->set("Timer Label","notMMFrom_tAFC");
9769  if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
9770  MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
9771 
9772  if (verbose) {
9773  std::ostringstream os;
9774  os << *verbosePrefix << "Call expertStaticFillComplete" << endl;
9775  std::cerr << os.str ();
9776  }
9777 
9778 #ifdef HAVE_TPETRA_MMM_TIMINGS
9779  TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string("notMMdestMat->expertStaticFillComplete")));
9780  esfc_params.set("Timer Label",prefix+std::string("notMM eSFC"));
9781 #else
9782  esfc_params.set("Timer Label",std::string("notMM eSFC"));
9783 #endif
9784 
9785  if (!params.is_null ()) {
9786  esfc_params.set ("compute global constants",
9787  params->get ("compute global constants", true));
9788  }
9789  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
9790  MyImport, Teuchos::null,
9791  rcp (new Teuchos::ParameterList (esfc_params)));
9792  }
9793 
9794  if (verbose) {
9795  std::ostringstream os;
9796  os << *verbosePrefix << "Done" << endl;
9797  std::cerr << os.str ();
9798  }
9799  }
9800 
9801 
9802  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9803  void
9806  const import_type& importer,
9807  const Teuchos::RCP<const map_type>& domainMap,
9808  const Teuchos::RCP<const map_type>& rangeMap,
9809  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9810  {
9811  transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
9812  }
9813 
9814  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9815  void
9818  const import_type& rowImporter,
9819  const import_type& domainImporter,
9820  const Teuchos::RCP<const map_type>& domainMap,
9821  const Teuchos::RCP<const map_type>& rangeMap,
9822  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9823  {
9824  transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
9825  }
9826 
9827  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9828  void
9831  const export_type& exporter,
9832  const Teuchos::RCP<const map_type>& domainMap,
9833  const Teuchos::RCP<const map_type>& rangeMap,
9834  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9835  {
9836  transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
9837  }
9838 
9839  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
9840  void
9843  const export_type& rowExporter,
9844  const export_type& domainExporter,
9845  const Teuchos::RCP<const map_type>& domainMap,
9846  const Teuchos::RCP<const map_type>& rangeMap,
9847  const Teuchos::RCP<Teuchos::ParameterList>& params) const
9848  {
9849  transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
9850  }
9851 
9852 
9853 } // namespace Tpetra
9854 
9855 //
9856 // Explicit instantiation macro
9857 //
9858 // Must be expanded from within the Tpetra namespace!
9859 //
9860 
9861 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
9862  \
9863  template class CrsMatrix< SCALAR , LO , GO , NODE >; \
9864  template Teuchos::RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
9865  CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
9866 
9867 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
9868  \
9869  template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
9870  CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
9871 
9872 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9873  template<> \
9874  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9875  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9876  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9877  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9878  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
9879  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9880  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9881  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9882  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9883  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9884  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9885  const Teuchos::RCP<Teuchos::ParameterList>& params);
9886 
9887 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9888  template<> \
9889  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9890  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9891  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9892  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9893  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
9894  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9895  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9896  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
9897  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9898  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9899  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9900  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9901  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9902  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9903  const Teuchos::RCP<Teuchos::ParameterList>& params);
9904 
9905 
9906 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9907  template<> \
9908  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9909  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9910  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9911  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9912  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
9913  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9914  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9915  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9916  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9917  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9918  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9919  const Teuchos::RCP<Teuchos::ParameterList>& params);
9920 
9921 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9922  template<> \
9923  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9924  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9925  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9926  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9927  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9928  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9929  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9930  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9931  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9932  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9933  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9934  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9935  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9936  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9937  const Teuchos::RCP<Teuchos::ParameterList>& params);
9938 
9939 
9940 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9941  TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9942  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9943  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9944  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9945  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
9946 
9947 #endif // TPETRA_CRSMATRIX_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
dual_view_type::t_host getLocalViewHost() const
A local Kokkos::View of host memory.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Functor for the the ABSMAX CombineMode of Import and Export operations.
void checkInternalState() const
Check that this object&#39;s state is sane; throw if it&#39;s not.
Abstract interface for local operators (e.g., matrices and preconditioners).
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
size_t getNodeNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix&#39;s column Map with the given Map.
virtual bool supportsRowViews() const override
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
LocalOrdinal getViewRaw(impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Nonconst pointer to all entries (including extra space) in the given row.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type > local_graph_type
The type of the part of the sparse graph on each MPI process.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
typename device_type::execution_space execution_space
The Kokkos execution space.
void getLocalRowCopy(LocalOrdinal localRow, const Teuchos::ArrayView< LocalOrdinal > &colInds, const Teuchos::ArrayView< Scalar > &vals, size_t &numEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
void clear_sync_state()
Clear &quot;modified&quot; flags on both host and device sides.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
size_t getNumVectors() const
Number of columns in the multivector.
size_t getLocalLength() const
Local number of rows on the calling process.
Declaration of a function that prints strings from each process.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
Traits class for packing / unpacking data of type T.
One or more distributed dense vectors.
size_t getNodeNumEntries() const override
The local number of entries in this matrix.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void scale(const Scalar &alpha)
Scale the matrix&#39;s values: this := alpha*this.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local graph and matrix.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the left with the given Vector.
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
bool isDistributed() const
Whether this is a globally distributed object.
void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< char *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode CM) override
Unpack the imported column indices and values, and combine into matrix.
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const override
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix&#39;s graph, as a CrsGraph.
static bool debug()
Whether Tpetra is in debug mode.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix&#39;s communicator...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
Scalar scalar_type
The type of each entry in the matrix.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
void swap(CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &matrix)
Swaps the data from *this with the data and maps from crsMatrix.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local matrix.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
bool isGloballyIndexed() const override
Whether the graph&#39;s column indices are stored as global indices.
void leftScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Left-scale a KokkosSparse::CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void globalAssemble()
Communicate nonlocal contributions to other processes.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const override
Get a copy of the diagonal entries of the matrix.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for &quot;new&quot; DistObject inter...
std::shared_ptr< local_multiply_op_type > lclMatrix_
The local sparse matrix, wrapped in a multiply operator.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
&quot;Hybrid&quot; Jacobi + (Gauss-Seidel or SOR) on .
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
bool isFillActive() const
Whether the matrix is not fill complete.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply() and gaussSeidel().
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular...
typename Kokkos::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
size_t global_size_t
Global size_t object.
bool hasTransposeApply() const override
Whether apply() allows applying the transpose or conjugate transpose.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given global row&#39;s entries.
void clearGlobalConstants()
Clear matrix properties that require collectives.
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
LocalOrdinal getLocalRowViewRaw(const LocalOrdinal lclRow, LocalOrdinal &numEnt, const LocalOrdinal *&lclColInds, const Scalar *&vals) const override
Get a constant, nonpersisting, locally indexed view of the given row of the matrix, using &quot;raw&quot; pointers instead of Teuchos::ArrayView.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
Insert new values that don&#39;t currently exist.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries...
local_matrix_type::values_type k_values1D_
Sparse matrix values, as part of compressed sparse row (&quot;1-D&quot;) storage.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals) const
Replace one or more entries&#39; values, using global indices.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
void packNew(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< char *, buffer_device_type > &exports, const Kokkos::DualView< size_t *, buffer_device_type > &numPacketsPerLID, size_t &constantNumPackets, Distributor &dist) const
Pack this object&#39;s data for an Import or Export.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Functions for manipulating CRS arrays.
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals) const
Replace one or more entries&#39; values, using local row and column indices.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
mag_type frobNorm_
Cached Frobenius norm of the (global) matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object with the given verbosity level to the given output stream.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix&#39;s graph, as a RowGraph.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Utility functions for packing and unpacking sparse matrix entries.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params) const override
Implementation of RowMatrix::add: return alpha*A + beta*this.
bool fillComplete_
Whether the matrix is fill complete.
local_ordinal_type sumIntoLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals, const bool atomic=useAtomicUpdatesByDefault) const
Sum into one or more sparse matrix entries, using local row and column indices.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
typename Node::device_type device_type
The Kokkos device type.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process...
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
std::string description() const override
A one-line description of this object.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute a sparse matrix-MultiVector multiply.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
Replace existing values with new values.
void computeGlobalConstants()
Compute matrix properties that require collectives.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void modify_host()
Mark data as modified on the host side.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is &quot;imbalanced&quot; in the number of entries per row...
bool isLocallyComplete() const
Is this Export or Import locally complete?
local_matrix_type::values_type getLocalValuesView() const
Get the Kokkos local values.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const override
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using local column indices.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
A parallel distribution of indices over processes.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object (&quot;forward mode&quot;).
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Teuchos::ArrayView< const impl_scalar_type > getView(RowInfo rowinfo) const
Constant view of all entries (including extra space) in the given row.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Details::EStorageStatus storageStatus_
Status of the matrix&#39;s storage, when not in a fill-complete state.
A read-only, row-oriented interface to a sparse matrix.
local_matrix_type getLocalMatrix() const
The local sparse matrix.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const override
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &x) override
Scale the matrix on the right with the given Vector.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
A distributed dense vector.
Declaration of Tpetra::Details::iallreduce.
void reduce()
Sum values of a locally replicated multivector across all processes.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
std::shared_ptr< CommRequest > iallreduce(const InputViewType &sendbuf, const OutputViewType &recvbuf, const ::Teuchos::EReductionType op, const ::Teuchos::Comm< int > &comm)
Nonblocking all-reduce, for either rank-1 or rank-0 Kokkos::View objects.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas, const bool verbose)
Allocate values (and optionally indices) using the Node.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process...
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
size_t mergeRowIndicesAndValues(crs_graph_type &graph, const RowInfo &rowInfo)
Merge duplicate row indices in the given row, along with their corresponding values.
dual_view_type::t_dev getLocalViewDevice() const
A local Kokkos::View of device memory.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
void reorderedGaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Reordered &quot;Hybrid&quot; Jacobi + (Gauss-Seidel or SOR) on .
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Teuchos::ArrayView< impl_scalar_type > getViewNonConst(const RowInfo &rowinfo) const
Nonconst view of all entries (including extra space) in the given row.
std::shared_ptr< local_multiply_op_type > getLocalMultiplyOperator() const
The local sparse matrix operator (a wrapper of getLocalMatrix() that supports local matrix-vector mul...
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
mag_type getFrobeniusNorm() const override
Compute and return the Frobenius norm of the matrix.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
void localApply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, const Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar &alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute the local part of a sparse matrix-(Multi)Vector multiply.
LocalOrdinal getViewRawConst(const impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Const pointer to all entries (including extra space) in the given row.
void rightScaleLocalCrsMatrix(const LocalSparseMatrixType &A_lcl, const ScalingFactorsViewType &scalingFactors, const bool assumeSymmetric, const bool divide=true)
Right-scale a KokkosSparse::CrsMatrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra&#39;s behavior.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...
void sync_device()
Synchronize to Device.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.